This file is used to prepare the figures for the paper.

library(dplyr)
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)

.libPaths()
## [1] "/usr/local/lib/R/library"

Preparation

Here are the folders where analyses are stored :

data_dir = "./.."
list.files(data_dir)
##  [1] "0_intro"      "1_metadata"   "2_individual" "3_combined"   "4_zoom"      
##  [6] "5_wu"         "6_takahashi"  "7_figures"    "LICENSE"      "README.md"   
## [11] "index.html"   "index_layout" "knit_all.sh"

These are the custom colors for cell populations :

color_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_color_markers.rds"))
color_markers = color_markers[names(color_markers) != "melanocytes"] # remove melanocytes

data.frame(cell_type = names(color_markers),
           color = unlist(color_markers)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank(),
                 axis.text.x = element_text(angle = 30, hjust = 1))

We define custom colors for sample type :

sample_type_colors = setNames(nm = c("HS", "HD"),
                              c("#C55F40", "#2C78E6"))

data.frame(cell_type = names(sample_type_colors),
           color = unlist(sample_type_colors)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(sample_type_colors), breaks = names(sample_type_colors)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank())

We define custom colors for laboratory :

laboratory_colors = setNames(nm = c("Our", "Wu", "Takahashi"),
                             c("firebrick3", "deepskyblue", "mediumpurple"))

data.frame(cell_type = names(laboratory_colors),
           color = unlist(laboratory_colors)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(laboratory_colors), breaks = names(laboratory_colors)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank())

We define custom colors for location :

location_colors = setNames(nm = c("axillary", "hair scalp", "pubis"),
                           c("forestgreen", "orchid3", "darkorange1"))

data.frame(cell_type = names(location_colors),
           color = unlist(location_colors)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(location_colors), breaks = names(location_colors)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank())

We define custom colors for gender :

gender_colors = setNames(nm = c("F", "M"),
                           c("lightslateblue", "chartreuse3"))

data.frame(cell_type = names(gender_colors),
           color = unlist(gender_colors)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(gender_colors), breaks = names(gender_colors)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank())

We set a background color :

bg_color = "gray94"

This is the correspondence between cell types and cell families, and custom colors to color cells by cell category :

custom_order_cell_type = data.frame(
  cell_type = names(color_markers),
  cell_category = c(rep("immune cells", 5),
                    rep("matrix", 5),
                    rep("non matrix", 5)),
  stringsAsFactors = FALSE)
custom_order_cell_type$cell_type = factor(custom_order_cell_type$cell_type,
                                          levels = custom_order_cell_type$cell_type)
rownames(custom_order_cell_type) = custom_order_cell_type$cell_type
custom_order_cell_type
##                                 cell_type cell_category
## CD4 T cells                   CD4 T cells  immune cells
## CD8 T cells                   CD8 T cells  immune cells
## Langerhans cells         Langerhans cells  immune cells
## macrophages                   macrophages  immune cells
## B cells                           B cells  immune cells
## cuticle                           cuticle        matrix
## cortex                             cortex        matrix
## medulla                           medulla        matrix
## IRS                                   IRS        matrix
## proliferative               proliferative        matrix
## HF-SCs                             HF-SCs    non matrix
## IFE basal                       IFE basal    non matrix
## IFE granular spinous IFE granular spinous    non matrix
## ORS                                   ORS    non matrix
## sebocytes                       sebocytes    non matrix
category_color = c("immune cells" = "slateblue1",
                   "matrix" = "mediumseagreen",
                   "non matrix" = "firebrick3")

We define markers to display on a dotplot to assess cell type annotation :

dotplot_markers = c("PTPRC",                # immune cells
                    "CD3E", "CD4",          # CD4+ T cells
                    "CD3E", "CD8A",         # CD8+ T cells
                    "CD207", "AIF1",        # Langerhans cells
                    "TREM2", "MSR1",        # macrophages
                    "CD79A", "CD79B",       # B cells
                    "MSX2",                 # matrix cells
                    "KRT32", "KRT35",       # cuticle
                    "KRT31", "PRR9",        # cortex
                    "BAMBI", "ALDH1A3",     # medulla
                    "KRT71", "KRT73",       # IRS
                    "TOP2A", "MCM5", "TK1", # cycling cells
                    "KRT14", "CXCL14",      # non-matrix cells
                    "DIO2", "TCEAL2",       # HF-SCs
                    "KRT15", "COL17A1",     # IFE basal
                    "SPINK5", "KRT1",       # ORS
                    "KRT16", "KRT6C",       # IFE granular or spinous
                    "CLMP", "PPARG")        # sebocytes

We copy-paste a subset of the markers associated with cell type annotation :

cell_markers = list(
  #  Immune cells
  "CD4 T cells" = c("CD3D", "CD3G", "CD3E", "CD52", "IL32",
                    "TRBC1", "CD4", "COTL1", "CD40LG", "GPR183", "TNFRSF1B", "FYB1", "LAT"),
  "CD8 T cells" = c("CD3D", "CD3G", "CD3E", "CD52", "IL32",
                    "CTSW", "NCR3", "CD8A", "NKG7", "KLRC1", "TRGC2", "CCL5", "ZNF683"),
  "Langerhans cells" = c("CD207", "LST1", "AIF1", "CPVL", "C15orf48", "CD1A", "CD52", "CD1E", "CD1C"),
  "macrophages" = c("LST1", "AIF1", "C3", "OLFML3", "TREM2", "A2M",
                    "MSR1", "FCGR3A", "VSIG4", "AP003481.1"),
  "B cells" = c("CD79A", "IGHG2", "IGHG3", "IGLC3", "IGHG1", "IGHM", "MS4A1", "IGLC2",
                "IGHGP", "BANK1", "IGHG4", "IGKC", "TNFRSF13C", "CD79B", "KLF2", "GZMB"),
  # Matrix
  "cuticle" = c("KRT32", "CYSTM1", "MT4", "KRT35", "VSNL1", "NOTCH1"),
  "cortex" = c("C10orf99", "CRYAB", "HEPHL1", "ALOX15B", "EFHD1", "DSG4", "DNASE1L2", "TGM3", "KRT31"),
  "medulla" = c("BAMBI", "SLC7A8", "EDNRA", "IGFBP2", "KRT25", "KITLG"),
  "IRS" = c("KRT71", "KRT27", "KRT28", "CTSC", "GATA3", "KRT73", "SCEL"),
  "proliferative" = c("TOP2A", "MKI67", "BIRC5", "TK1", "MCM5"),
  # Non-matrix
  "HF-SCs" = c("KRT15", "DST", "DIO2", "TCEAL2", "SFRP1", "TNC", "WIF1", "FRZB", "LHX2", "COL17A1"),
  "IFE basal" = c("GPX2", "MOXD1", "C19orf48", "ALDH3A1", "CDH13", "LAMB3", "CAVIN1"),
  "IFE granular spinous" = c("DEFB1", "LY6D", "EHF", "PDZK1IP1", "S100A7", "FGFBP1", "SPINK5", "KRT1", "KRTDAP"),
  "ORS" = c("KRT16", "KRT6B", "KRT6C", "TM4SF1", "APOC1", "CBLN2", "HES4", "LYPD3"),
  # other
  "melanocytes" = c("DCT", "KIT", "MLANA", "GPM6B", "EDNRB", "PMEL", "IGFBP7", "MITF", "ZEB2", "APOD"),
  "sebocytes" = c("CLMP", "PPARG", "ACSBG1", "MGST1", "SAA1", "APMAP"))

lengths(cell_markers)
##          CD4 T cells          CD8 T cells     Langerhans cells 
##                   13                   13                    9 
##          macrophages              B cells              cuticle 
##                   10                   16                    6 
##               cortex              medulla                  IRS 
##                    9                    6                    7 
##        proliferative               HF-SCs            IFE basal 
##                    5                   10                    7 
## IFE granular spinous                  ORS          melanocytes 
##                    9                    8                   10 
##            sebocytes 
##                    6

Custom functions to display gene expression on the heatmap :

color_fun = function(one_gene) {
  gene_range = range(ht_annot[, one_gene])
  gene_palette = circlize::colorRamp2(colors = c("#FFFFFF", aquarius::color_gene[-1]),
                                      breaks = seq(from = gene_range[1], to = gene_range[2],
                                                   length.out = length(aquarius::color_gene)))
  return(gene_palette)
}

Load datasets

In a list, we load:

  • Seurat object
  • name of the 2D projection to visualize cells on
  • various third element such as: sample information, trajectory, results from gene analysis…
list_data = list()

## Our dataset
# Atlas of all cells
list_data$our_atlas$sobj = readRDS(paste0(data_dir, "/3_combined/hs_hd_sobj.rds"))
list_data$our_atlas$name2D = "harmony_38_tsne"
list_data$our_atlas$sample_info = readRDS(paste0(data_dir, "/1_metadata/hs_hd_sample_info.rds"))
list_data$our_atlas$sobj
## An object of class Seurat 
## 20003 features across 12111 samples within 1 assay 
## Active assay: RNA (20003 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_38_tsne, RNA_pca_38_umap, harmony, harmony_38_umap, harmony_38_tsne
# ORS + IFEb dataset
list_data$ors_ifeb$sobj = readRDS(paste0(data_dir, "/4_zoom/3_zoom_ors_ifeb/ors_ifeb_sobj.rds"))
list_data$ors_ifeb$name2D = "harmony_20_tsne"
list_data$ors_ifeb$list_results = readRDS(paste0(data_dir, "/4_zoom/3_zoom_ors_ifeb/ors_ifeb_list_results.rds"))
list_data$ors_ifeb$sobj
## An object of class Seurat 
## 16683 features across 3416 samples within 1 assay 
## Active assay: RNA (16683 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne, RNA_pca_20_umap, harmony, harmony_20_umap, harmony_20_tsne
# HF-SCs dataset
list_data$hfsc$sobj = readRDS(paste0(data_dir, "/4_zoom/2_zoom_hfsc/hfsc_sobj.rds"))
list_data$hfsc$name2D = "harmony_24_tsne"
list_data$hfsc$list_results = readRDS(paste0(data_dir, "/4_zoom/2_zoom_hfsc/hfsc_list_results.rds"))
list_data$hfsc$sobj
## An object of class Seurat 
## 15384 features across 1454 samples within 1 assay 
## Active assay: RNA (15384 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_24_tsne, RNA_pca_24_umap, harmony, harmony_24_umap, harmony_24_tsne
# Non matrix cells dataset
list_data$non_matrix$sobj = readRDS(paste0(data_dir, "/4_zoom/5_zoom_non_matrix/non_matrix_sobj_traj_tinga.rds"))
list_data$non_matrix$name2D = "harmony_dm"
list_data$non_matrix$my_traj = readRDS(paste0(data_dir, "/4_zoom/5_zoom_non_matrix/non_matrix_my_traj_tinga.rds"))
list_data$non_matrix$sobj
## An object of class Seurat 
## 17837 features across 5599 samples within 1 assay 
## Active assay: RNA (17837 features, 2000 variable features)
##  8 dimensional reductions calculated: RNA_pca, RNA_pca_18_tsne, RNA_pca_18_umap, harmony, harmony_18_umap, harmony_18_tsne, harmony_dm, harmony_dm_5_umap
# Matrix cells dataset (not considered in the manuscript)
# list_data$matrix$sobj = readRDS(paste0(data_dir, "/4_zoom/6_zoom_matrix/matrix_sobj.rds"))
# list_data$matrix$name2D = "harmony_19_tsne"
# list_data$matrix$list_results = readRDS(paste0(data_dir, "/4_zoom/6_zoom_matrix/matrix_list_results.rds"))
# list_data$matrix$sobj

# Immune cells
list_data$immune$sobj = readRDS(paste0(data_dir, "/4_zoom/1_zoom_immune/immune_cells_sobj.rds"))
list_data$immune$name2D = "harmony_20_tsne"
list_data$immune$list_results = readRDS(paste0(data_dir, "/4_zoom/1_zoom_immune/immune_cells_list_results.rds"))
list_data$immune$sobj
## An object of class Seurat 
## 15121 features across 2329 samples within 1 assay 
## Active assay: RNA (15121 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne, RNA_pca_20_umap, harmony, harmony_20_umap, harmony_20_tsne
## Wu dataset (OEP002321)
# Atlas of all cells
list_data$wu_atlas$sobj = readRDS(paste0(data_dir, "/5_wu/3_combined/wu_sobj.rds"))
list_data$wu_atlas$name2D = "harmony_39_tsne"
list_data$wu_atlas$sample_info = readRDS(paste0(data_dir, "/5_wu/1_metadata/wu_sample_info.rds"))

# ORS + IFEb dataset
list_data$wu_ors_ifeb$sobj = readRDS(paste0(data_dir, "/5_wu/4_ors_ifeb/ors_ifeb_sobj.rds"))
list_data$wu_ors_ifeb$name2D = "harmony_20_tsne"
list_data$wu_ors_ifeb$list_results = readRDS(paste0(data_dir, "/5_wu/4_ors_ifeb/ors_ifeb_list_results.rds"))
list_data$wu_ors_ifeb$sobj
## An object of class Seurat 
## 15543 features across 7987 samples within 1 assay 
## Active assay: RNA (15543 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_19_tsne, RNA_pca_19_umap, harmony, harmony_19_umap, harmony_19_tsne
## Takahashi dataset (GSE129611)
# Atlas of all cells
list_data$takahashi_atlas$sobj = readRDS(paste0(data_dir, "/6_takahashi/3_combined/takahashi_sobj.rds"))
list_data$takahashi_atlas$name2D = "harmony_41_tsne"
list_data$takahashi_atlas$sample_info = readRDS(paste0(data_dir, "/6_takahashi/1_metadata/takahashi_sample_info.rds"))
list_data$takahashi_atlas$sobj
## An object of class Seurat 
## 18588 features across 5636 samples within 1 assay 
## Active assay: RNA (18588 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_41_tsne, RNA_pca_41_umap, harmony, harmony_41_umap, harmony_41_tsne
# ORS + IFEb dataset
list_data$takahashi_ors_ifeb$sobj = readRDS(paste0(data_dir, "/6_takahashi/4_ors_ifeb/ors_ifeb_sobj.rds"))
list_data$takahashi_ors_ifeb$name2D = "harmony_23_tsne"
list_data$takahashi_ors_ifeb$list_results = readRDS(paste0(data_dir, "/6_takahashi/4_ors_ifeb/ors_ifeb_list_results.rds"))
list_data$takahashi_ors_ifeb$sobj
## An object of class Seurat 
## 14911 features across 1562 samples within 1 assay 
## Active assay: RNA (14911 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_23_tsne, RNA_pca_23_umap, harmony, harmony_23_umap, harmony_23_tsne
## Three datasets combined (Our + OEP002321 + GSE129611)
list_data$combined$sobj = readRDS(paste0(data_dir, "/3_combined/data3_sobj.rds"))
list_data$combined$name2D = "harmony_37_tsne"
list_data$combined$sobj
## An object of class Seurat 
## 19849 features across 35289 samples within 1 assay 
## Active assay: RNA (19849 features, 2000 variable features)
##  6 dimensional reductions calculated: RNA_pca, RNA_pca_37_tsne, RNA_pca_37_umap, harmony, harmony_37_umap, harmony_37_tsne
## Print
str(list_data, max.level = 2)
## List of 10
##  $ our_atlas         :List of 3
##   ..$ sobj       :Formal class 'Seurat' [package "Seurat"] with 12 slots
##   ..$ name2D     : chr "harmony_38_tsne"
##   ..$ sample_info:'data.frame':  7 obs. of  8 variables:
##  $ ors_ifeb          :List of 3
##   ..$ sobj        :Formal class 'Seurat' [package "Seurat"] with 12 slots
##   ..$ name2D      : chr "harmony_20_tsne"
##   ..$ list_results:List of 4
##  $ hfsc              :List of 3
##   ..$ sobj        :Formal class 'Seurat' [package "Seurat"] with 12 slots
##   ..$ name2D      : chr "harmony_24_tsne"
##   ..$ list_results:List of 11
##  $ non_matrix        :List of 3
##   ..$ sobj   :Formal class 'Seurat' [package "Seurat"] with 12 slots
##   ..$ name2D : chr "harmony_dm"
##   ..$ my_traj:List of 18
##   .. ..- attr(*, "class")= chr [1:4] "dynwrap::with_dimred" "dynwrap::with_trajectory" "dynwrap::data_wrapper" "list"
##  $ immune            :List of 3
##   ..$ sobj        :Formal class 'Seurat' [package "Seurat"] with 12 slots
##   ..$ name2D      : chr "harmony_20_tsne"
##   ..$ list_results:List of 4
##  $ wu_atlas          :List of 3
##   ..$ sobj       :Formal class 'Seurat' [package "Seurat"] with 12 slots
##   ..$ name2D     : chr "harmony_39_tsne"
##   ..$ sample_info:'data.frame':  6 obs. of  8 variables:
##  $ wu_ors_ifeb       :List of 3
##   ..$ sobj        :Formal class 'Seurat' [package "Seurat"] with 12 slots
##   ..$ name2D      : chr "harmony_20_tsne"
##   ..$ list_results:List of 1
##  $ takahashi_atlas   :List of 3
##   ..$ sobj       :Formal class 'Seurat' [package "Seurat"] with 12 slots
##   ..$ name2D     : chr "harmony_41_tsne"
##   ..$ sample_info:'data.frame':  5 obs. of  8 variables:
##  $ takahashi_ors_ifeb:List of 3
##   ..$ sobj        :Formal class 'Seurat' [package "Seurat"] with 12 slots
##   ..$ name2D      : chr "harmony_23_tsne"
##   ..$ list_results:List of 1
##  $ combined          :List of 2
##   ..$ sobj  :Formal class 'Seurat' [package "Seurat"] with 12 slots
##   ..$ name2D: chr "harmony_37_tsne"

Figures

Sample information

Our dataset:

sobj = list_data$our_atlas$sobj
sample_info = list_data$our_atlas$sample_info

# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
  as.data.frame.table(., stringsAsFactors = FALSE) %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  dplyr::left_join(x = ., y = sample_info, by = "sample_identifier") 

# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
  patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))

Wu dataset:

sobj = list_data$wu_atlas$sobj
sample_info = list_data$wu_atlas$sample_info

# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
  as.data.frame.table(., stringsAsFactors = FALSE) %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  dplyr::left_join(x = ., y = sample_info, by = "sample_identifier") 

# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
  patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))

Takahashi dataset:

sobj = list_data$takahashi_atlas$sobj
sample_info = list_data$takahashi_atlas$sample_info

# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
  as.data.frame.table(., stringsAsFactors = FALSE) %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  dplyr::left_join(x = ., y = sample_info, by = "sample_identifier") 

# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
  patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))

Annotation smoothing

For each dataset, we smooth the single-cell level cell type annotation at a cluster level.

  • Our dataset:
# Select dataset
sobj = list_data$our_atlas$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$our_atlas$sobj = sobj
# Select dataset
sobj = list_data$non_matrix$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$non_matrix$sobj = sobj
# Select dataset
sobj = list_data$immune$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$immune$sobj = sobj
  • Wu dataset:
# Select dataset
sobj = list_data$wu_atlas$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$wu_atlas$sobj = sobj
  • Takahashi dataset:
# Select dataset
sobj = list_data$takahashi_atlas$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$takahashi_atlas$sobj = sobj
  • Combined dataset:
# Select dataset
sobj = list_data$combined$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$combined$sobj = sobj

For the combined dataset, we also define the cell category:

# Select dataset
sobj = list_data$combined$sobj

# Define cell category
sobj$cell_category = custom_order_cell_type[sobj$cell_type, "cell_category"]
table(sobj$cell_type, sobj$cell_category)
##                       
##                        immune cells matrix non matrix
##   CD4 T cells                  1059      0          0
##   CD8 T cells                   652      0          0
##   Langerhans cells              461      0          0
##   macrophages                   571      0          0
##   B cells                       207      0          0
##   cuticle                         0   3021          0
##   cortex                          0   2163          0
##   medulla                         0    903          0
##   IRS                             0   1152          0
##   proliferative                   0   2590          0
##   HF-SCs                          0      0       3292
##   IFE basal                       0      0       4117
##   IFE granular spinous            0      0       5349
##   ORS                             0      0       9121
##   sebocytes                       0      0        631
# Consider change
list_data$combined$sobj = sobj

Atlas

Our dataset

We select the dataset:

sobj = list_data$our_atlas$sobj
name2D = list_data$our_atlas$name2D
sample_info = list_data$our_atlas$sample_info
  • sample identifier
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_identifier = sobj$sample_identifier
cells_coord$location = sobj$location
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_identifier)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$sample_identifier) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

  • location
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = location)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = location_colors,
                              breaks = names(location_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

  • cell type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • cluster type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • dotplot for cell type annotation
plot_list = aquarius::plot_dotplot(sobj,
                                   markers = dotplot_markers,
                                   assay = "RNA", column_name = "cell_type", nb_hline = 0) +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(legend.position = "left",
                 legend.justification = "bottom",
                 legend.box = "vertical",
                 legend.box.margin = margin(0,70,0,0),
                 axis.title = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.text.x = element_blank(),
                 axis.line.x = element_blank(),
                 plot.margin = unit(rep(0, 4), "cm"))

p = data.frame(cell_type = factor(levels(sobj$cell_type),
                                  levels = levels(sobj$cell_type))) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0)) +
  ggplot2::geom_point(size = 0) +
  ggplot2::geom_segment(aes(x = 0.5, xend = 5.5, y = 0, yend = 0), size = 6, col = category_color["immune cells"]) +
  ggplot2::geom_segment(aes(x = 5.5, xend = 10.5, y = 0, yend = 0), size = 6, col = category_color["matrix"]) +
  ggplot2::geom_segment(aes(x = 10.5, xend = 15.5, y = 0, yend = 0), size = 6, col = category_color["non matrix"]) +
  ggplot2::scale_y_continuous(expand = c(0,0), limits = c(0,0)) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.y = element_blank(),
                 axis.ticks.y = element_blank(),
                 axis.title = element_blank(),
                 axis.line.y = element_blank(),
                 axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
                 plot.margin = unit(c(0,0.5,0.5,0), "cm"))

plot_list = patchwork::wrap_plots(plot_list, p,
                                  ncol = 1, heights = c(25, 1))
plot_list

  • barplot with proportion of cells for cell types of interest, by sample ID
# Cells of interest
cells_oi = c("CD4 T cells", "macrophages")

# Proportion of cells by sample ID
df_proportion = table(sobj$cluster_type,
                      sobj$sample_identifier) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("cluster_type", "sample_identifier", "freq")) %>%
  dplyr::filter(cluster_type %in% cells_oi) %>%
  dplyr::mutate(cluster_type = factor(cluster_type, levels = cells_oi)) %>%
  dplyr::mutate(freq = 100*freq) # percentage

# Plot
ggplot2::ggplot(df_proportion, aes(x = sample_identifier,
                                   y = freq,
                                   fill = cluster_type)) +
  ggplot2::geom_bar(stat = "identity", position = ggplot2::position_dodge(),
                    color = "black", width = 0.7) +
  ggplot2::labs(y = "% of cells by sample") +
  ggplot2::scale_y_continuous(expand = c(0, 0), limits = c(0,14)) +
  ggplot2::scale_fill_manual(values = color_markers[cells_oi],
                             breaks = cells_oi,
                             name = "Cell Type") +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.title.x = element_blank(),
                 axis.line.x = element_line(color = "gray50"),
                 panel.grid.major.y = element_line(),
                 panel.grid.minor.y = element_line(),
                 text = element_text(size = 15))

Wu dataset

We select the dataset:

sobj = list_data$wu_atlas$sobj
name2D = list_data$wu_atlas$name2D
sample_info = list_data$wu_atlas$sample_info
  • sample identifier
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_identifier = sobj$sample_identifier
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_identifier)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$sample_identifier) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

  • cell type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • cluster type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • dotplot for cell type annotation
plot_list = aquarius::plot_dotplot(sobj,
                                   markers = dotplot_markers,
                                   assay = "RNA", column_name = "cell_type", nb_hline = 0) +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(legend.position = "left",
                 legend.justification = "bottom",
                 legend.box = "vertical",
                 legend.box.margin = margin(0,70,0,0),
                 axis.title = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.text.x = element_blank(),
                 axis.line.x = element_blank(),
                 plot.margin = unit(rep(0, 4), "cm"))

p = data.frame(cell_type = factor(levels(sobj$cell_type),
                                  levels = levels(sobj$cell_type))) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0)) +
  ggplot2::geom_point(size = 0) +
  ggplot2::geom_segment(aes(x = 0.5, xend = 5.5, y = 0, yend = 0), size = 6, col = category_color["immune cells"]) +
  ggplot2::geom_segment(aes(x = 5.5, xend = 10.5, y = 0, yend = 0), size = 6, col = category_color["matrix"]) +
  ggplot2::geom_segment(aes(x = 10.5, xend = 15.5, y = 0, yend = 0), size = 6, col = category_color["non matrix"]) +
  ggplot2::scale_y_continuous(expand = c(0,0), limits = c(0,0)) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.y = element_blank(),
                 axis.ticks.y = element_blank(),
                 axis.title = element_blank(),
                 axis.line.y = element_blank(),
                 axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
                 plot.margin = unit(c(0,0.5,0.5,0), "cm"))

plot_list = patchwork::wrap_plots(plot_list, p,
                                  ncol = 1, heights = c(25, 1))
plot_list

Takahashi dataset

We select the dataset:

sobj = list_data$takahashi_atlas$sobj
name2D = list_data$takahashi_atlas$name2D
sample_info = list_data$takahashi_atlas$sample_info
  • sample identifier
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_identifier = sobj$sample_identifier
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_identifier)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$sample_identifier) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

  • cell type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • cluster type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • dotplot for cell type annotation
plot_list = aquarius::plot_dotplot(sobj,
                                   markers = dotplot_markers,
                                   assay = "RNA", column_name = "cell_type", nb_hline = 0) +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(legend.position = "left",
                 legend.justification = "bottom",
                 legend.box = "vertical",
                 legend.box.margin = margin(0,70,0,0),
                 axis.title = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.text.x = element_blank(),
                 axis.line.x = element_blank(),
                 plot.margin = unit(rep(0, 4), "cm"))

p = data.frame(cell_type = factor(levels(sobj$cell_type),
                                  levels = levels(sobj$cell_type))) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0)) +
  ggplot2::geom_point(size = 0) +
  ggplot2::geom_segment(aes(x = 0.5, xend = 5.5, y = 0, yend = 0), size = 6, col = category_color["immune cells"]) +
  ggplot2::geom_segment(aes(x = 5.5, xend = 10.5, y = 0, yend = 0), size = 6, col = category_color["matrix"]) +
  ggplot2::geom_segment(aes(x = 10.5, xend = 15.5, y = 0, yend = 0), size = 6, col = category_color["non matrix"]) +
  ggplot2::scale_y_continuous(expand = c(0,0), limits = c(0,0)) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.y = element_blank(),
                 axis.ticks.y = element_blank(),
                 axis.title = element_blank(),
                 axis.line.y = element_blank(),
                 axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
                 plot.margin = unit(c(0,0.5,0.5,0), "cm"))

plot_list = patchwork::wrap_plots(plot_list, p,
                                  ncol = 1, heights = c(25, 1))
plot_list

Combined datasets

We select the dataset:

sobj = list_data$combined$sobj
name2D = list_data$combined$name2D
sample_info = rbind(list_data$our_atlas$sample_info,
                    list_data$wu_atlas$sample_info,
                    list_data$takahashi_atlas$sample_info)
  • sample identifier
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_identifier = sobj$sample_identifier
cells_coord$location = sobj$location
cells_coord$laboratory = sobj$laboratory
cells_coord$gender = dplyr::left_join(sobj@meta.data,
                                      sample_info,
                                      by = "project_name")[, "gender"]
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_identifier)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$sample_identifier) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

  • location
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = location)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = location_colors,
                              breaks = names(location_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

  • laboratory
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = laboratory)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = laboratory_colors,
                              breaks = names(laboratory_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

  • gender
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = gender)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = gender_colors,
                              breaks = names(gender_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")

  • cell type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • cluster type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • split by laboratory:
plot_list = aquarius::plot_split_dimred(sobj,
                                        reduction = name2D,
                                        split_by = "laboratory",
                                        group_by = "cluster_type",
                                        group_color = color_markers,
                                        order = TRUE,
                                        bg_color = bg_color)

patchwork::wrap_plots(plot_list, nrow = 1) +
  patchwork::plot_layout(guides = "collect") &
  Seurat::NoLegend()

  • gene to assess global annotation
plot_list = lapply(c("PTPRC", "MSX2", "KRT14"), FUN = function(one_gene) {
  p = Seurat::FeaturePlot(sobj, features = one_gene, reduction = name2D) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
    ggplot2::theme(aspect.ratio = 1) +
    Seurat::NoAxes() +
    Seurat::NoLegend()
  
  return(p)
})

plot_list
## [[1]]

## 
## [[2]]

## 
## [[3]]

  • violin plot, split by cell type, split by laboratory

for non matrix cells

category_of_interest = "non matrix"

# Select cells of interest
cell_type_of_interest = as.character(custom_order_cell_type[
  which(custom_order_cell_type$cell_category == category_of_interest), "cell_type"])

subsobj_of_interest = subset(sobj, cell_type %in% cell_type_of_interest)
subsobj_of_interest$cell_type_labo = paste0(subsobj_of_interest$cell_type,
                                            subsobj_of_interest$laboratory)

cell_markers_of_interest = cell_markers[cell_type_of_interest]

total_height = max(lengths(cell_markers_of_interest)) + 1

# Violin plot
patchwork_list = lapply(names(cell_markers_of_interest), FUN = function(cell_type) {
  markers_set = cell_markers_of_interest[[cell_type]]
  
  # Individual violin plot
  sub_plot_list = lapply(markers_set, FUN = function(one_gene) {
    p = Seurat::VlnPlot(subsobj_of_interest,
                        features = one_gene, pt.size = 0,
                        group.by = "cell_type",
                        split.by = "laboratory",
                        cols = laboratory_colors,
                        combine = FALSE)[[1]] +
      ggplot2::labs(y = one_gene) +
      ggplot2::scale_y_continuous(expand = c(0.01, 0)) +
      ggplot2::geom_vline(mapping = NULL, data = NULL,
                          xintercept = c(1:4) + 0.5,
                          col = "gray50", lty = 2) +
      Seurat::NoLegend() +
      ggplot2::theme(plot.title = element_blank(),
                     axis.title.x = element_blank(), # "Identity"
                     axis.line.x = element_blank(),  # replaced by panel.border
                     axis.title.y = element_text(size = 8, angle = 0, vjust = 0.5), # gene
                     axis.text.y = element_blank(), # "Expression level"
                     axis.text.x = element_blank(), # cell type
                     axis.ticks = element_blank(),
                     plot.margin = unit(c(0,0,0,0), "cm"),
                     panel.border = element_rect(size = 1, color = "gray50"))
    return(p)
  })
  
  # Reset axis.text for last plot
  # sub_plot_list[[length(sub_plot_list)]] = sub_plot_list[[length(sub_plot_list)]] +
  #   ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) # cell type
  
  # Add empty plot to arrange heights
  sub_plot_list[[length(sub_plot_list) + 1]] = patchwork::plot_spacer()
  empty_plot_height = total_height - length(markers_set)
  
  # Arrange as patchwork
  pp = patchwork::wrap_plots(sub_plot_list,
                             ncol = 1,
                             heights = c(rep(1, length(markers_set)),
                                         empty_plot_height)) +
    patchwork::plot_annotation(title = cell_type)
  
  return(pp)
})
names(patchwork_list) = names(cell_markers_of_interest)

patchwork::wrap_plots(patchwork_list, ncol = 1)

for matrix cells

category_of_interest = "matrix"

# Select cells of interest
cell_type_of_interest = as.character(custom_order_cell_type[
  which(custom_order_cell_type$cell_category == category_of_interest), "cell_type"])

subsobj_of_interest = subset(sobj, cell_type %in% cell_type_of_interest)
subsobj_of_interest$cell_type_labo = paste0(subsobj_of_interest$cell_type,
                                            subsobj_of_interest$laboratory)

cell_markers_of_interest = cell_markers[cell_type_of_interest]

total_height = max(lengths(cell_markers_of_interest)) + 1

# Violin plot
patchwork_list = lapply(names(cell_markers_of_interest), FUN = function(cell_type) {
  markers_set = cell_markers_of_interest[[cell_type]]
  
  # Individual violin plot
  sub_plot_list = lapply(markers_set, FUN = function(one_gene) {
    p = Seurat::VlnPlot(subsobj_of_interest,
                        features = one_gene, pt.size = 0,
                        group.by = "cell_type",
                        split.by = "laboratory",
                        cols = laboratory_colors,
                        combine = FALSE)[[1]] +
      ggplot2::labs(y = one_gene) +
      ggplot2::scale_y_continuous(expand = c(0.01, 0)) +
      ggplot2::geom_vline(mapping = NULL, data = NULL,
                          xintercept = c(1:4) + 0.5,
                          col = "gray50", lty = 2) +
      Seurat::NoLegend() +
      ggplot2::theme(plot.title = element_blank(),
                     axis.title.x = element_blank(), # "Identity"
                     axis.line.x = element_blank(),  # replaced by panel.border
                     axis.title.y = element_text(size = 8, angle = 0, vjust = 0.5), # gene
                     axis.text.y = element_blank(), # "Expression level"
                     axis.text.x = element_blank(), # cell type
                     axis.ticks = element_blank(),
                     plot.margin = unit(c(0,0,0,0), "cm"),
                     panel.border = element_rect(size = 1, color = "gray50"))
    return(p)
  })
  
  # Reset axis.text for last plot
  # sub_plot_list[[length(sub_plot_list)]] = sub_plot_list[[length(sub_plot_list)]] +
  #   ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) # cell type
  
  # Add empty plot to arrange heights
  sub_plot_list[[length(sub_plot_list) + 1]] = patchwork::plot_spacer()
  empty_plot_height = total_height - length(markers_set)
  
  # Arrange as patchwork
  pp = patchwork::wrap_plots(sub_plot_list,
                             ncol = 1,
                             heights = c(rep(1, length(markers_set)),
                                         empty_plot_height)) +
    patchwork::plot_annotation(title = cell_type)
  
  return(pp)
})
names(patchwork_list) = names(cell_markers_of_interest)

patchwork::wrap_plots(patchwork_list, ncol = 1)

Non-matrix cells

We select the datasets:

sobj_atlas = list_data$our_atlas$sobj
name2D_atlas = list_data$our_atlas$name2D
sobj = list_data$non_matrix$sobj
name2D = list_data$non_matrix$name2D
my_traj = list_data$non_matrix$my_traj

cell_type_of_interest = c("HF-SCs", "ORS", "IFE basal", "IFE granular spinous")

Location of cells on the whole atlas:

sobj_atlas$cell_bc = colnames(sobj_atlas)
sobj$cell_bc = colnames(sobj)
sobj_atlas$is_of_interest = dplyr::left_join(sobj_atlas@meta.data[, c("cell_bc", "percent.mt")],
                                             sobj@meta.data[, c("cell_bc", "cluster_type")],
                                             by = "cell_bc")[, "cluster_type"]

Seurat::DimPlot(sobj_atlas, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_of_interest", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[cell_type_of_interest], bg_color),
                              breaks = c(cell_type_of_interest, NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()

  • sample type
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "sample_type", cols = sample_type_colors) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • cell type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • cluster type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • pseudotime
Seurat::FeaturePlot(sobj, reduction = name2D, pt.size = 0.5,
                    features = "pseudotime") +
  ggplot2::scale_color_gradientn(colors = viridis::viridis(n = 100)) +
  ggplot2::lims(x = range(sobj@reductions[[name2D]]@cell.embeddings[, 1]),
                y = range(sobj@reductions[[name2D]]@cell.embeddings[, 2])) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank(),
                 legend.position = "bottom",
                 legend.direction = "horizontal") +
  Seurat::NoAxes()

  • pseudotime with dynplot’s function :
dynplot::plot_dimred(trajectory = my_traj,
                     dimred = sobj[[name2D]]@cell.embeddings,
                     # Cells
                     color_cells = 'pseudotime',
                     size_cells = 0.5,
                     border_radius_percentage = 0,
                     # Trajectory
                     plot_trajectory = TRUE,
                     color_trajectory = "none",
                     label_milestones = FALSE,
                     size_milestones = 0,
                     size_transitions = 1)

  • violin plots
features_oi = c("SOX9", "TCF4", "TBX1", "NFATC1", 
                "LHX2", "RUNX1", "VIM", "TCF3", "FGF18", "CD34",
                # HF-SCs + IFE basal
                "COL17A1", "MT2A", "TXNIP", "CAVIN1",
                # HF-SCs + ORS
                "TUBB2A", "KRT6B", "MARCKSL1", "ID4", "CYP1B1", "BARX2",
                # HF-SCs
                "LMCD1", "TCEAL2", "FRZB", "THBS1", "DIO2")

plot_list = lapply(features_oi, FUN = function(one_gene) {
  p = Seurat::VlnPlot(sobj, group.by = "cluster_type",
                      features = one_gene, pt.size = 0.001) +
    ggplot2::scale_fill_manual(values = color_markers[levels(sobj$cluster_type)],
                               breaks = levels(sobj$cluster_type)) +
    ggplot2::theme(plot.subtitle = element_text(hjust = 0.5),
                   axis.title.x = element_blank(),
                   legend.position = "none")
  return(p)
})

names(plot_list) = features_oi
plot_list
## $SOX9

## 
## $TCF4

## 
## $TBX1

## 
## $NFATC1

## 
## $LHX2

## 
## $RUNX1

## 
## $VIM

## 
## $TCF3

## 
## $FGF18

## 
## $CD34

## 
## $COL17A1

## 
## $MT2A

## 
## $TXNIP

## 
## $CAVIN1

## 
## $TUBB2A

## 
## $KRT6B

## 
## $MARCKSL1

## 
## $ID4

## 
## $CYP1B1

## 
## $BARX2

## 
## $LMCD1

## 
## $TCEAL2

## 
## $FRZB

## 
## $THBS1

## 
## $DIO2

  • feature plots
plot_list = lapply(features_oi, FUN = function(one_gene) {
  p = Seurat::FeaturePlot(sobj, features = one_gene, reduction = name2D) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
    ggplot2::lims(x = range(sobj@reductions[[name2D]]@cell.embeddings[, 1]),
                  y = range(sobj@reductions[[name2D]]@cell.embeddings[, 2])) +
    ggplot2::theme(aspect.ratio = 1) +
    Seurat::NoAxes() +
    Seurat::NoLegend()
  
  return(p)
})

names(plot_list) = features_oi
plot_list
## $SOX9

## 
## $TCF4

## 
## $TBX1

## 
## $NFATC1

## 
## $LHX2

## 
## $RUNX1

## 
## $VIM

## 
## $TCF3

## 
## $FGF18

## 
## $CD34

## 
## $COL17A1

## 
## $MT2A

## 
## $TXNIP

## 
## $CAVIN1

## 
## $TUBB2A

## 
## $KRT6B

## 
## $MARCKSL1

## 
## $ID4

## 
## $CYP1B1

## 
## $BARX2

## 
## $LMCD1

## 
## $TCEAL2

## 
## $FRZB

## 
## $THBS1

## 
## $DIO2

ORS vs IFE basal

Our dataset

We select the dataset:

sobj_atlas = list_data$our_atlas$sobj
name2D_atlas = list_data$our_atlas$name2D
sobj = list_data$ors_ifeb$sobj
name2D = list_data$ors_ifeb$name2D
list_results = list_data$ors_ifeb$list_results

Location of cells on the whole atlas:

sobj_atlas$cell_bc = colnames(sobj_atlas)
sobj$cell_bc = colnames(sobj)
sobj_atlas$is_of_interest = dplyr::left_join(sobj_atlas@meta.data[, c("cell_bc", "percent.mt")],
                                             sobj@meta.data[, c("cell_bc", "cell_type")],
                                             by = "cell_bc")[, "cell_type"]

Seurat::DimPlot(sobj_atlas, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_of_interest", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[cell_type_of_interest], bg_color),
                              breaks = c(cell_type_of_interest, NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()

DE genes between ORS and IFE basal :

mark = list_results$ORS_vs_IFE_basal$mark
mark$gene_name = rownames(mark)
mark_label = rbind(
  # up-regulated in ORS
  mark %>% dplyr::top_n(., n = 20, wt = avg_logFC),
  # up-regulated in IFE basal
  mark %>% dplyr::top_n(., n = 20, wt = -avg_logFC),
  # representative and selective for ORS
  mark %>% dplyr::top_n(., n = 20, wt = (pct.1 - pct.2)),
  # representative and selective for IFE basal
  mark %>% dplyr::top_n(., n = 20, wt = -(pct.1 - pct.2))) %>%
  dplyr::distinct()
mark_label = mark_label[!grepl(rownames(mark_label), pattern = "^MT"), ]

avg_logFC_range = setNames(c(min(mark_label$avg_logFC), -1, 0, 1, max(mark_label$avg_logFC)),
                           nm = c("dodgerblue4", "dodgerblue3", "#B7B7B7", "firebrick3", "firebrick4"))


ggplot2::ggplot(mark, aes(x = pct.1, y = pct.2, col = avg_logFC)) +
  ggplot2::geom_abline(slope = 1, intercept = 0, lty = 2) +
  ggplot2::geom_point() +
  ggrepel::geom_label_repel(data = mark_label, max.overlaps = Inf,
                            aes(x = pct.1, y = pct.2, label = gene_name),
                            col = "black", fill = NA, size = 3.5, label.size = NA) +
  ggplot2::labs(x = "Enriched in ORS",
                y = "Enriched in IFE basal") +
  ggplot2::scale_color_gradientn(colors = names(avg_logFC_range),
                                 values = scales::rescale(unname(avg_logFC_range))) +
  ggplot2::theme_classic() +
  ggplot2::theme(aspect.ratio = 1)

  • GSEA, enriched in ORS compared to IFE basal
the_gs_name = "REACTOME_KERATINIZATION" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))

  • GSEA, enriched in IFE basal compared to ORS
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))

  • violin plots of DE genes in ORS, between HS and HD
subsobj = subset(sobj, cell_type == "ORS")
features_oi = c("DUSP1", "DDIT4", "GABARAP", "ZNF302",
                "MIF", "LGALS7", "ARF5", "S100A9")

# Plot !
plot_list = lapply(features_oi, FUN = function(feature) {
  # t-test between sample type
  feature_expr = subsobj@assays$RNA@data[feature, ]
  feature_hs = feature_expr[subsobj$sample_type == "HS"]
  feature_hd = feature_expr[subsobj$sample_type == "HD"]
  feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
  feature_hs_VS_feature_hd
  
  # Significance associated with p-value
  pval = feature_hs_VS_feature_hd$p.value
  
  significance = case_when(pval > 0.05 ~ "ns",
                           pval > 0.01 & pval <= 0.05 ~ "*",
                           pval > 0.001 & pval <= 0.01 ~ "**",
                           pval <= 0.001 ~ "***")
  
  # Plot
  p = Seurat::VlnPlot(subsobj, group.by = "sample_type",
                      pt.size = 0.3, features = feature) +
    ggplot2::scale_fill_manual(values = sample_type_colors,
                               breaks = names(sample_type_colors)) +
    # Significance bar
    ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
                          size = 0.5,
                          x = 1, xend = 2,
                          y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
    ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
                        x = 1.5, y = max(feature_expr)+0.35,
                        label = significance,
                        fill = NA, label.size = 0) +
    ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
    # Theme
    ggplot2::theme(axis.title.x = element_blank(),
                   legend.position = "none")
  
  return(p)
})

names(plot_list) = features_oi
plot_list
## $DUSP1

## 
## $DDIT4

## 
## $GABARAP

## 
## $ZNF302

## 
## $MIF

## 
## $LGALS7

## 
## $ARF5

## 
## $S100A9

  • violin plots of DE genes in IFE basal, between HS and HD
subsobj = subset(sobj, cell_type == "IFE basal")
features_oi = c("DUSP1", "KLF6", "CLDN1", "CTGF",
                "IFI27", "IFITM3", "CCL2", "S100A9")

# Plot !
plot_list = lapply(features_oi, FUN = function(feature) {
  # t-test between sample type
  feature_expr = subsobj@assays$RNA@data[feature, ]
  feature_hs = feature_expr[subsobj$sample_type == "HS"]
  feature_hd = feature_expr[subsobj$sample_type == "HD"]
  feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
  feature_hs_VS_feature_hd
  
  # Significance associated with p-value
  pval = feature_hs_VS_feature_hd$p.value
  
  significance = case_when(pval > 0.05 ~ "ns",
                           pval > 0.01 & pval <= 0.05 ~ "*",
                           pval > 0.001 & pval <= 0.01 ~ "**",
                           pval <= 0.001 ~ "***")
  
  # Plot
  p = Seurat::VlnPlot(subsobj, group.by = "sample_type",
                      pt.size = 0.3, features = feature) +
    ggplot2::scale_fill_manual(values = sample_type_colors,
                               breaks = names(sample_type_colors)) +
    # Significance bar
    ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
                          size = 0.5,
                          x = 1, xend = 2,
                          y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
    ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
                        x = 1.5, y = max(feature_expr)+0.35,
                        label = significance,
                        fill = NA, label.size = 0) +
    ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
    # Theme
    ggplot2::theme(axis.title.x = element_blank(),
                   legend.position = "none")
  
  return(p)
})

names(plot_list) = features_oi
plot_list
## $DUSP1

## 
## $KLF6

## 
## $CLDN1

## 
## $CTGF

## 
## $IFI27

## 
## $IFITM3

## 
## $CCL2

## 
## $S100A9

Wu dataset

We select the dataset:

sobj_atlas = list_data$wu_atlas$sobj
name2D_atlas = list_data$wu_atlas$name2D
sobj = list_data$wu_ors_ifeb$sobj
name2D = list_data$wu_ors_ifeb$name2D
list_results = list_data$wu_ors_ifeb$list_results

Location of cells on the whole atlas:

sobj_atlas$cell_bc = colnames(sobj_atlas)
sobj$cell_bc = colnames(sobj)
sobj_atlas$is_of_interest = dplyr::left_join(sobj_atlas@meta.data[, c("cell_bc", "percent.mt")],
                                             sobj@meta.data[, c("cell_bc", "cell_type")],
                                             by = "cell_bc")[, "cell_type"]

Seurat::DimPlot(sobj_atlas, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_of_interest", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[cell_type_of_interest], bg_color),
                              breaks = c(cell_type_of_interest, NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()

  • GSEA, enriched in ORS compared to IFE basal
the_gs_name = "REACTOME_KERATINIZATION" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))

  • GSEA, enriched in IFE basal compared to ORS
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))

Takahashi dataset

We select the dataset:

sobj_atlas = list_data$takahashi_atlas$sobj
name2D_atlas = list_data$takahashi_atlas$name2D
sobj = list_data$takahashi_ors_ifeb$sobj
name2D = list_data$takahashi_ors_ifeb$name2D
list_results = list_data$takahashi_ors_ifeb$list_results

Location of cells on the whole atlas:

sobj_atlas$cell_bc = colnames(sobj_atlas)
sobj$cell_bc = colnames(sobj)
sobj_atlas$is_of_interest = dplyr::left_join(sobj_atlas@meta.data[, c("cell_bc", "percent.mt")],
                                             sobj@meta.data[, c("cell_bc", "cell_type")],
                                             by = "cell_bc")[, "cell_type"]

Seurat::DimPlot(sobj_atlas, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_of_interest", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[cell_type_of_interest], bg_color),
                              breaks = c(cell_type_of_interest, NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()

  • GSEA, enriched in ORS compared to IFE basal
the_gs_name = "REACTOME_KERATINIZATION" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))

  • GSEA, enriched in IFE basal compared to ORS
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))

HF-SCs

We select the dataset:

sobj = list_data$hfsc$sobj
name2D = list_data$hfsc$name2D
list_results = list_data$hfsc$list_results
  • violin plots of DE genes in HF-SCs, between HS and HD
features_oi = c("HLA-C", "HLA-A", "IFITM3", "MIF", "JUNB", "PPIA",
                "PDK4", "DDIT4", "BTG2", "TXNIP", "KLF9")

# Plot !
plot_list = lapply(features_oi, FUN = function(feature) {
  # t-test between sample type
  feature_expr = sobj@assays$RNA@data[feature, ]
  feature_hs = feature_expr[sobj$sample_type == "HS"]
  feature_hd = feature_expr[sobj$sample_type == "HD"]
  feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
  feature_hs_VS_feature_hd
  
  # Significance associated with p-value
  pval = feature_hs_VS_feature_hd$p.value
  
  significance = case_when(pval > 0.05 ~ "ns",
                           pval > 0.01 & pval <= 0.05 ~ "*",
                           pval > 0.001 & pval <= 0.01 ~ "**",
                           pval <= 0.001 ~ "***")
  
  # Plot
  p = Seurat::VlnPlot(sobj, group.by = "sample_type",
                      pt.size = 0.3, features = feature) +
    ggplot2::scale_fill_manual(values = sample_type_colors,
                               breaks = names(sample_type_colors)) +
    # Significance bar
    ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
                          size = 0.5,
                          x = 1, xend = 2,
                          y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
    ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
                        x = 1.5, y = max(feature_expr)+0.35,
                        label = significance,
                        fill = NA, label.size = 0) +
    ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
    # Theme
    ggplot2::theme(axis.title.x = element_blank(),
                   legend.position = "none")
  
  return(p)
})

names(plot_list) = features_oi
plot_list
## $`HLA-C`

## 
## $`HLA-A`

## 
## $IFITM3

## 
## $MIF

## 
## $JUNB

## 
## $PPIA

## 
## $PDK4

## 
## $DDIT4

## 
## $BTG2

## 
## $TXNIP

## 
## $KLF9

* heatmap

Heatmap between HS and HD :

# features_oi = list_results$HS_vs_HD %>%
#   dplyr::filter(pct.1 > 0.5 | pct.2 > 0.5) %>%
#   dplyr::arrange(-avg_logFC, -pct.1, -pct.2) %>%
#   rownames()
# features_oi = features_oi[!grepl(features_oi, pattern = "^RP")]

features_oi = c("IFITM3", "MIF", "HLA-C", "LMCD1", "TGFB2", "CYP1B1",
                "KRT6B", "WIF1", "HSPA1A", "COMT", "CISD1", "PRNP",  "CD81",
                "HLA-A", "B2M", "PPIA", "PYCARD", "CSAD", "PFN1", "HIF1A",
                "BASP1", "CAV1", "ZNF302", "LMO4", "SEPT11", "HTRA1",
                "GPM6A", "SOX4", "ZFP36L2", "RHOB", "CLDN1", "DUSP1", "TBX1",
                "ATF3", "DDIT4", "TXNIP", "BTG2", "SGK1", "KLF6", "LGR5")

# Matrix
mat_expr = Seurat::GetAssayData(sobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = cbind(mat_expr, sobj$percent.mt)
colnames(mat_expr)[ncol(mat_expr)] = "percent.rb"
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## [1]   41 1454
## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# list_colors[["seurat_clusters"]] = setNames(aquarius::gg_color_hue(length(levels(sobj$seurat_clusters))),
#                                             nm = levels(sobj$seurat_clusters))
# Cells order
column_order = sobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(sobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = sobj$sample_type,
                           sample_identifier = sobj$sample_identifier,
                           # clusters = sobj$seurat_clusters,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]
                                      # clusters = list_colors[["seurat_clusters"]]
                           ))


# g1 : REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM
# g2 : GOBP_APOPTOTIC_PROCESS
g1_genes = c("B2M", "HLA-C", "HLA-A", "MIF", "PPIA", "JUNB", "IFITM3")
g2_genes = c("JUN", "ATF3", "BTG2", "RHOB", "NFKBIA", "SGK1", "KLF9",
             "CAV1", "DDIT4", "PDK4", "TXNIP", "RNF1152", "TLE1")
ha_right = data.frame(genes =  c(features_oi, "percent.rb"), rownames = c(features_oi, "percent.rb"))
ha_right$group = case_when(ha_right$genes %in% g1_genes ~ "REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM",
                           ha_right$genes %in% g2_genes ~ "GOBP_APOPTOTIC_PROCESS",
                           TRUE ~ "others")

list_colors[["group"]] = setNames(
  nm = c("REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM", "GOBP_APOPTOTIC_PROCESS", "others"),
  c("red", "black", "gray90"))

ha_right = HeatmapAnnotation(group = ha_right$group,
                             col = list(group = list_colors[["group"]]),
                             which = "row",
                             show_annotation_name = FALSE,
                             show_legend = TRUE)

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             right_annotation = ha_right,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             show_row_dend = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 10, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "bottom",
                     annotation_legend_side = "bottom")

Immune cells

We select the datasets:

sobj_atlas = list_data$our_atlas$sobj
name2D_atlas = list_data$our_atlas$name2D
sobj = list_data$immune$sobj
name2D = list_data$immune$name2D

cell_type_of_interest = c("CD4 T cells", "CD8 T cells", "Langerhans cells",
                          "macrophages", "B cells", "proliferative")

Location of cells on the whole atlas:

sobj_atlas$cell_bc = colnames(sobj_atlas)
sobj$cell_bc = colnames(sobj)
sobj_atlas$is_of_interest = dplyr::left_join(sobj_atlas@meta.data[, c("cell_bc", "percent.mt")],
                                             sobj@meta.data[, c("cell_bc", "cluster_type")],
                                             by = "cell_bc")[, "cluster_type"]

Seurat::DimPlot(sobj_atlas, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_of_interest", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[cell_type_of_interest], bg_color),
                              breaks = c(cell_type_of_interest, NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()

  • cell type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • cluster type annotation
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()

  • feature plots for genes related to TCR alpha
features_oi = c("CD3E", "CD4", "CD8A",
                sort(grep(rownames(sobj), pattern = "^TRA[C|V]", value = TRUE)))

plot_list = lapply(features_oi, FUN = function(one_gene) {
  p = Seurat::FeaturePlot(sobj, features = one_gene,
                          reduction = name2D, order = TRUE) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
    ggplot2::theme(aspect.ratio = 1) +
    Seurat::NoAxes() +
    Seurat::NoLegend()
  
  return(p)
})

patchwork::wrap_plots(plot_list, nrow = 4)

  • feature plots split by sample type (HS / HD)
features_oi = data.frame(population = c("all immune cells", rep("macrophages", 2),
                                      rep("CD4 T cells", 3), rep("CD8 T cells", 2)),
                         feature = c("IL6", "IL1B", "TNF", "GZMA", "IFNG", "IL17A", "PRF1", "GZMB"))
features_oi
##         population feature
## 1 all immune cells     IL6
## 2      macrophages    IL1B
## 3      macrophages     TNF
## 4      CD4 T cells    GZMA
## 5      CD4 T cells    IFNG
## 6      CD4 T cells   IL17A
## 7      CD8 T cells    PRF1
## 8      CD8 T cells    GZMB
patchwork_list = lapply(features_oi$feature, FUN = function(one_gene) {
  plot_list = aquarius::plot_split_dimred(sobj,
                                          reduction = name2D,
                                          split_by = "sample_type",
                                          color_by = one_gene,
                                          order = TRUE,
                                          bg_color = bg_color)
  
  p = patchwork::wrap_plots(plot_list, nrow = 1) +
    patchwork::plot_layout(guides = "collect")
  
  return(p)
})
names(patchwork_list) = features_oi$feature

patchwork_list
## $IL6

## 
## $IL1B

## 
## $TNF

## 
## $GZMA

## 
## $IFNG

## 
## $IL17A

## 
## $PRF1

## 
## $GZMB

  • violin plots within a specific population
plot_list = lapply(c(1:nrow(features_oi)), FUN = function(i) {
  one_gene = features_oi$feature[i]
  population = features_oi$population[i]
  
  # Subset object
  if (population != "all immune cells") {
    subsobj = subset(sobj, cluster_type %in% population)
  } else {
   subsobj = sobj 
  }
  
  # t-test between sample type
  feature_expr = subsobj@assays$RNA@data[one_gene, ]
  feature_hs = feature_expr[subsobj$sample_type == "HS"]
  feature_hd = feature_expr[subsobj$sample_type == "HD"]
  feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
  feature_hs_VS_feature_hd
  
  # Significance associated with p-value
  pval = feature_hs_VS_feature_hd$p.value
  
  significance = case_when(pval > 0.05 ~ "ns",
                           pval > 0.01 & pval <= 0.05 ~ "*",
                           pval > 0.001 & pval <= 0.01 ~ "**",
                           pval <= 0.001 ~ "***")
  
  # Plot
  p = Seurat::VlnPlot(subsobj, group.by = "sample_type",
                      pt.size = 0.3, features = one_gene) +
    ggplot2::scale_fill_manual(values = sample_type_colors,
                               breaks = names(sample_type_colors)) +
    ggplot2::labs(title = population,
                  subtitle = one_gene) +
    # Significance bar
    ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
                          size = 0.5,
                          x = 1, xend = 2,
                          y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
    ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
                        x = 1.5, y = max(feature_expr)+0.35,
                        label = significance,
                        fill = NA, label.size = 0) +
    ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
    # Theme
    ggplot2::theme(axis.title.x = element_blank(),
                   plot.title = element_text(size = 10),
                   plot.subtitle = element_text(hjust = 0.5),
                   legend.position = "none")
  
  return(p)
})
names(plot_list) = features_oi$feature

plot_list
## $IL6

## 
## $IL1B

## 
## $TNF

## 
## $GZMA

## 
## $IFNG

## 
## $IL17A

## 
## $PRF1

## 
## $GZMB

  • heatmap macrophages
subsobj = subset(sobj, cluster_type == "macrophages")
features_oi = c("IL1B", "TNF",
                "HLA-DQA2", "HLA-DPA1", "HLA-DRB5",
                "HLA-A", "HLA-C", "B2M",
                "C1QA", "C1QB", "C1QC")

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## [1]  11 463
## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# Cells order
column_order = subsobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]))

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "bottom",
                     annotation_legend_side = "bottom")

  • heatmap T cells
subsobj = subset(sobj, cluster_type == "CD4 T cells")
features_oi = c("GZMA", "KLRB1", "BTG1", "ZFP36", "NFKBIA", "TXNIP", "CXCR4", "IFNG", "IL17A")

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## [1]   9 848
## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# Cells order
column_order = subsobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]))

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "left",
                     annotation_legend_side = "left")

Supplementary tables

In this section, we save files to associate with the manuscript, as supplementary tables.

Table S2 (package version)

We load the table :

package_version = read.table(paste0(".", "/data/info_to_install_2023_04_17.txt"),
                             header = TRUE)
head(package_version)
##   order package_name  version
## 1     1      acepack    1.4.1
## 2     2    ADGofTest      0.3
## 3     3    backports    1.2.1
## 4     4    base64enc    0.1-3
## 5     5           BH 1.72.0-3
## 6     6          bit    4.0.4
##                                                                              url
## 1                    https://cran.r-project.org/src/contrib/acepack_1.4.1.tar.gz
## 2                    https://cran.r-project.org/src/contrib/ADGofTest_0.3.tar.gz
## 3 http://cran.r-project.org/src/contrib/Archive/backports/backports_1.2.1.tar.gz
## 4                  https://cran.r-project.org/src/contrib/base64enc_0.1-3.tar.gz
## 5            http://cran.r-project.org/src/contrib/Archive/BH/BH_1.72.0-3.tar.gz
## 6             http://cran.r-project.org/src/contrib/Archive/bit/bit_4.0.4.tar.gz
##           type
## 1    CRAN_last
## 2    CRAN_last
## 3 CRAN_archive
## 4    CRAN_last
## 5 CRAN_archive
## 6 CRAN_archive

Columns correspond to :

  • order : order to install package such that one never need to install dependencies
  • package_name : package name
  • version : installed version, on the local machine
  • url : the package was installed in the Singularity container using this url

We save this table, except the last column (just for me) :

openxlsx::write.xlsx(package_version[, c(1:4)],
                     file = paste0(".", "/data/Supplementary Table 2.xlsx"))

Table S3 (cell type annotation)

We load the table :

cell_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_cell_markers.rds"))
lengths(cell_markers)
##          CD4 T cells          CD8 T cells     Langerhans cells 
##                   13                   13                    9 
##          macrophages              B cells              cuticle 
##                   10                   16                   15 
##               cortex              medulla                  IRS 
##                   16                   10                   16 
##        proliferative               HF-SCs            IFE basal 
##                   20                   17                   16 
## IFE granular spinous                  ORS          melanocytes 
##                   17                   15                   10 
##            sebocytes 
##                    8

We save this table, except the last column (just for me) :

openxlsx::write.xlsx(cell_markers,
                     file = paste0(".", "/data/Supplementary Table 3.xlsx"))

R Session

show
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
## 
## locale:
## [1] C
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] ComplexHeatmap_2.14.0 ggplot2_3.3.5         patchwork_1.1.2      
## [4] dplyr_1.0.7          
## 
## loaded via a namespace (and not attached):
##   [1] softImpute_1.4              graphlayouts_0.7.0         
##   [3] pbapply_1.4-2               lattice_0.20-41            
##   [5] haven_2.3.1                 dyndimred_1.0.3            
##   [7] vctrs_0.3.8                 usethis_2.0.1              
##   [9] dynwrap_1.2.1               blob_1.2.1                 
##  [11] survival_3.2-13             prodlim_2019.11.13         
##  [13] dynutils_1.0.5              later_1.3.0                
##  [15] DBI_1.1.1                   R.utils_2.11.0             
##  [17] SingleCellExperiment_1.8.0  rappdirs_0.3.3             
##  [19] uwot_0.1.8                  dqrng_0.2.1                
##  [21] jpeg_0.1-8.1                zlibbioc_1.32.0            
##  [23] pspline_1.0-18              pcaMethods_1.78.0          
##  [25] mvtnorm_1.1-1               htmlwidgets_1.5.4          
##  [27] GlobalOptions_0.1.2         future_1.22.1              
##  [29] UpSetR_1.4.0                laeken_0.5.2               
##  [31] leiden_0.3.3                clustree_0.4.3             
##  [33] lmds_0.1.0                  parallel_3.6.3             
##  [35] scater_1.14.6               irlba_2.3.3                
##  [37] markdown_1.1                DEoptimR_1.0-9             
##  [39] tidygraph_1.1.2             Rcpp_1.0.9                 
##  [41] readr_2.0.2                 KernSmooth_2.23-17         
##  [43] carrier_0.1.0               promises_1.1.0             
##  [45] gdata_2.18.0                DelayedArray_0.12.3        
##  [47] limma_3.42.2                graph_1.64.0               
##  [49] RcppParallel_5.1.4          Hmisc_4.4-0                
##  [51] fs_1.5.2                    RSpectra_0.16-0            
##  [53] fastmatch_1.1-0             ranger_0.12.1              
##  [55] digest_0.6.25               png_0.1-7                  
##  [57] sctransform_0.2.1           cowplot_1.0.0              
##  [59] DOSE_3.12.0                 here_1.0.1                 
##  [61] TInGa_0.0.0.9000            dynplot_1.1.0              
##  [63] ggraph_2.0.3                pkgconfig_2.0.3            
##  [65] GO.db_3.10.0                DelayedMatrixStats_1.8.0   
##  [67] gower_0.2.1                 ggbeeswarm_0.6.0           
##  [69] iterators_1.0.12            DropletUtils_1.6.1         
##  [71] reticulate_1.26             clusterProfiler_3.14.3     
##  [73] SummarizedExperiment_1.16.1 circlize_0.4.15            
##  [75] beeswarm_0.4.0              GetoptLong_1.0.5           
##  [77] xfun_0.35                   bslib_0.3.1                
##  [79] zoo_1.8-10                  tidyselect_1.1.0           
##  [81] GA_3.2                      reshape2_1.4.4             
##  [83] purrr_0.3.4                 ica_1.0-2                  
##  [85] pcaPP_1.9-73                viridisLite_0.3.0          
##  [87] rtracklayer_1.46.0          rlang_1.0.2                
##  [89] hexbin_1.28.1               jquerylib_0.1.4            
##  [91] dyneval_0.9.9               glue_1.4.2                 
##  [93] waldo_0.3.1                 RColorBrewer_1.1-2         
##  [95] matrixStats_0.56.0          stringr_1.4.0              
##  [97] lava_1.6.7                  europepmc_0.3              
##  [99] DESeq2_1.26.0               recipes_0.1.17             
## [101] labeling_0.3                httpuv_1.5.2               
## [103] class_7.3-17                BiocNeighbors_1.4.2        
## [105] DO.db_2.9                   annotate_1.64.0            
## [107] jsonlite_1.7.2              XVector_0.26.0             
## [109] bit_4.0.4                   mime_0.9                   
## [111] aquarius_0.1.5              Rsamtools_2.2.3            
## [113] gridExtra_2.3               gplots_3.0.3               
## [115] stringi_1.4.6               processx_3.5.2             
## [117] gsl_2.1-6                   bitops_1.0-6               
## [119] cli_3.0.1                   batchelor_1.2.4            
## [121] RSQLite_2.2.0               randomForest_4.6-14        
## [123] tidyr_1.1.4                 data.table_1.14.2          
## [125] rstudioapi_0.13             org.Mm.eg.db_3.10.0        
## [127] GenomicAlignments_1.22.1    nlme_3.1-147               
## [129] qvalue_2.18.0               scran_1.14.6               
## [131] locfit_1.5-9.4              scDblFinder_1.1.8          
## [133] listenv_0.8.0               ggthemes_4.2.4             
## [135] gridGraphics_0.5-0          R.oo_1.24.0                
## [137] dbplyr_1.4.4                BiocGenerics_0.32.0        
## [139] TTR_0.24.2                  readxl_1.3.1               
## [141] lifecycle_1.0.1             timeDate_3043.102          
## [143] ggpattern_0.3.1             munsell_0.5.0              
## [145] cellranger_1.1.0            R.methodsS3_1.8.1          
## [147] proxyC_0.1.5                visNetwork_2.0.9           
## [149] caTools_1.18.0              codetools_0.2-16           
## [151] Biobase_2.46.0              GenomeInfoDb_1.22.1        
## [153] vipor_0.4.5                 lmtest_0.9-38              
## [155] msigdbr_7.5.1               htmlTable_1.13.3           
## [157] triebeard_0.3.0             lsei_1.2-0                 
## [159] xtable_1.8-4                ROCR_1.0-7                 
## [161] BiocManager_1.30.10         scatterplot3d_0.3-41       
## [163] abind_1.4-5                 farver_2.0.3               
## [165] parallelly_1.28.1           RANN_2.6.1                 
## [167] askpass_1.1                 GenomicRanges_1.38.0       
## [169] RcppAnnoy_0.0.16            tibble_3.1.5               
## [171] ggdendro_0.1-20             cluster_2.1.0              
## [173] future.apply_1.5.0          Seurat_3.1.5               
## [175] dendextend_1.15.1           Matrix_1.3-2               
## [177] ellipsis_0.3.2              prettyunits_1.1.1          
## [179] lubridate_1.7.9             ggridges_0.5.2             
## [181] igraph_1.2.5                RcppEigen_0.3.3.7.0        
## [183] fgsea_1.12.0                remotes_2.4.2              
## [185] scBFA_1.0.0                 destiny_3.0.1              
## [187] VIM_6.1.1                   testthat_3.1.0             
## [189] htmltools_0.5.2             BiocFileCache_1.10.2       
## [191] yaml_2.2.1                  utf8_1.1.4                 
## [193] plotly_4.9.2.1              XML_3.99-0.3               
## [195] ModelMetrics_1.2.2.2        e1071_1.7-3                
## [197] foreign_0.8-76              withr_2.5.0                
## [199] fitdistrplus_1.0-14         BiocParallel_1.20.1        
## [201] xgboost_1.4.1.1             bit64_4.0.5                
## [203] foreach_1.5.0               robustbase_0.93-9          
## [205] Biostrings_2.54.0           GOSemSim_2.13.1            
## [207] rsvd_1.0.3                  memoise_2.0.0              
## [209] evaluate_0.18               forcats_0.5.0              
## [211] rio_0.5.16                  geneplotter_1.64.0         
## [213] tzdb_0.1.2                  caret_6.0-86               
## [215] ps_1.6.0                    DiagrammeR_1.0.6.1         
## [217] curl_4.3                    fdrtool_1.2.15             
## [219] fansi_0.4.1                 highr_0.8                  
## [221] urltools_1.7.3              xts_0.12.1                 
## [223] GSEABase_1.48.0             acepack_1.4.1              
## [225] edgeR_3.28.1                checkmate_2.0.0            
## [227] scds_1.2.0                  cachem_1.0.6               
## [229] npsurv_0.4-0                babelgene_22.3             
## [231] rjson_0.2.20                openxlsx_4.1.5             
## [233] ggrepel_0.9.1               clue_0.3-60                
## [235] rprojroot_2.0.2             stabledist_0.7-1           
## [237] tools_3.6.3                 sass_0.4.0                 
## [239] nichenetr_1.1.1             magrittr_2.0.1             
## [241] RCurl_1.98-1.2              proxy_0.4-24               
## [243] car_3.0-11                  ape_5.3                    
## [245] ggplotify_0.0.5             xml2_1.3.2                 
## [247] httr_1.4.2                  assertthat_0.2.1           
## [249] rmarkdown_2.18              boot_1.3-25                
## [251] globals_0.14.0              R6_2.4.1                   
## [253] Rhdf5lib_1.8.0              nnet_7.3-14                
## [255] RcppHNSW_0.2.0              progress_1.2.2             
## [257] genefilter_1.68.0           statmod_1.4.34             
## [259] gtools_3.8.2                shape_1.4.6                
## [261] HDF5Array_1.14.4            BiocSingular_1.2.2         
## [263] rhdf5_2.30.1                splines_3.6.3              
## [265] AUCell_1.8.0                carData_3.0-4              
## [267] colorspace_1.4-1            generics_0.1.0             
## [269] stats4_3.6.3                base64enc_0.1-3            
## [271] dynfeature_1.0.0            smoother_1.1               
## [273] gridtext_0.1.1              pillar_1.6.3               
## [275] tweenr_1.0.1                sp_1.4-1                   
## [277] ggplot.multistats_1.0.0     rvcheck_0.1.8              
## [279] GenomeInfoDbData_1.2.2      plyr_1.8.6                 
## [281] gtable_0.3.0                zip_2.2.0                  
## [283] knitr_1.41                  latticeExtra_0.6-29        
## [285] biomaRt_2.42.1              IRanges_2.20.2             
## [287] fastmap_1.1.0               ADGofTest_0.3              
## [289] copula_1.0-0                doParallel_1.0.15          
## [291] AnnotationDbi_1.48.0        vcd_1.4-8                  
## [293] babelwhale_1.0.1            openssl_1.4.1              
## [295] scales_1.1.1                backports_1.2.1            
## [297] S4Vectors_0.24.4            ipred_0.9-12               
## [299] enrichplot_1.6.1            hms_1.1.1                  
## [301] ggforce_0.3.1               Rtsne_0.15                 
## [303] shiny_1.7.1                 numDeriv_2016.8-1.1        
## [305] polyclip_1.10-0             lazyeval_0.2.2             
## [307] Formula_1.2-3               tsne_0.1-3                 
## [309] crayon_1.3.4                MASS_7.3-54                
## [311] pROC_1.16.2                 viridis_0.5.1              
## [313] dynparam_1.0.0              rpart_4.1-15               
## [315] zinbwave_1.8.0              compiler_3.6.3             
## [317] ggtext_0.1.0
---
title: "HS project"
subtitle: "Figures"
author: "Audrey"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
output:
  html_document:
    code_folding: show
    code_download: true
    toc: true
    toc_float: true
    number_sections: false
---

<style>
body {
text-align: justify}
</style>

<!-- Automatically computes and prints in the output the running time for any code chunk -->
```{r, echo=FALSE}
# https://github.com/rstudio/rmarkdown/issues/1453
hooks = knitr::knit_hooks$get()
hook_foldable = function(type) {
  force(type)
  function(x, options) {
    res = hooks[[type]](x, options)
    
    if (isFALSE(options[[paste0("fold_", type)]])) return(res)
    
    paste0(
      "<details><summary>", "show", "</summary>\n\n",
      res,
      "\n\n</details>"
    )
  }
}
knitr::knit_hooks$set(
  output = hook_foldable("output"),
  plot = hook_foldable("plot"),
  time_it = local({
    now = NULL
    function(before, options) {
      if (options$time_it) {
        if (before) {
          now <= Sys.time()
        } else {
          res = difftime(Sys.time(), now, units = "secs")
          paste("(Time to run :", round(res, digits = 2), "s)")
        }
      }
    }
  })
)
```

<!-- Set default parameters for all chunks -->
```{r, setup, include = FALSE}
set.seed(1337L)
knitr::opts_chunk$set(echo = TRUE, # display code
                      # display chunk output
                      message = FALSE,
                      warning = FALSE,
                      fold_output = FALSE, # useful for sessionInfo()
                      fold_plot = FALSE,
                      
                      # figure settings
                      fig.align = 'center',
                      fig.width = 20,
                      fig.height = 15,
                      
                      # something about seed, chunk and Rmarkdown compilation
                      # https://stackoverflow.com/questions/39417003/long-vectors-not-supported-yet-error-in-rmd-but-not-in-r-script
                      # cache = TRUE,
                      cache.lazy = FALSE, 
                      
                      # add runtime after chunk
                      time_it = FALSE,
                      
                      # save figures in PDF in a separate folder
                      dev = c('png', 'pdf'), # tiff or pdf alone renders bad in html
                      # dpi = 300,
                      fig.path = "figures_detail/",
                      pdf.options(encoding = "ISOLatin9.enc"))
```


This file is used to prepare the figures for the paper.

```{r library}
library(dplyr)
library(patchwork)
library(ggplot2)
library(ComplexHeatmap)

.libPaths()
```


# Preparation

Here are the folders where analyses are stored :

```{r locations}
data_dir = "./.."
list.files(data_dir)
```

These are the custom colors for cell populations :

```{r color_markers, fig.width = 10, fig.height = 1.2}
color_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_color_markers.rds"))
color_markers = color_markers[names(color_markers) != "melanocytes"] # remove melanocytes

data.frame(cell_type = names(color_markers),
           color = unlist(color_markers)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank(),
                 axis.text.x = element_text(angle = 30, hjust = 1))
```

We define custom colors for sample type :

```{r sample_type_colors, fig.width = 3, fig.height = 0.75}
sample_type_colors = setNames(nm = c("HS", "HD"),
                              c("#C55F40", "#2C78E6"))

data.frame(cell_type = names(sample_type_colors),
           color = unlist(sample_type_colors)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(sample_type_colors), breaks = names(sample_type_colors)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank())
```

We define custom colors for laboratory :

```{r laboratory_colors, fig.width = 3, fig.height = 0.75}
laboratory_colors = setNames(nm = c("Our", "Wu", "Takahashi"),
                             c("firebrick3", "deepskyblue", "mediumpurple"))

data.frame(cell_type = names(laboratory_colors),
           color = unlist(laboratory_colors)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(laboratory_colors), breaks = names(laboratory_colors)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank())
```

We define custom colors for location :

```{r location_colors, fig.width = 3, fig.height = 0.75}
location_colors = setNames(nm = c("axillary", "hair scalp", "pubis"),
                           c("forestgreen", "orchid3", "darkorange1"))

data.frame(cell_type = names(location_colors),
           color = unlist(location_colors)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(location_colors), breaks = names(location_colors)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank())
```

We define custom colors for gender :

```{r gender_colors, fig.width = 3, fig.height = 0.75}
gender_colors = setNames(nm = c("F", "M"),
                           c("lightslateblue", "chartreuse3"))

data.frame(cell_type = names(gender_colors),
           color = unlist(gender_colors)) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
  ggplot2::geom_point(pch = 21, size = 5) +
  ggplot2::scale_fill_manual(values = unlist(gender_colors), breaks = names(gender_colors)) +
  ggplot2::theme_classic() +
  ggplot2::theme(legend.position = "none",
                 axis.line = element_blank(),
                 axis.title = element_blank(),
                 axis.ticks = element_blank(),
                 axis.text.y = element_blank())
```

We set a background color :

```{r bg_color}
bg_color = "gray94"
```


This is the correspondence between cell types and cell families, and custom colors to color cells by cell category :

```{r cell_category}
custom_order_cell_type = data.frame(
  cell_type = names(color_markers),
  cell_category = c(rep("immune cells", 5),
                    rep("matrix", 5),
                    rep("non matrix", 5)),
  stringsAsFactors = FALSE)
custom_order_cell_type$cell_type = factor(custom_order_cell_type$cell_type,
                                          levels = custom_order_cell_type$cell_type)
rownames(custom_order_cell_type) = custom_order_cell_type$cell_type
custom_order_cell_type

category_color = c("immune cells" = "slateblue1",
                   "matrix" = "mediumseagreen",
                   "non matrix" = "firebrick3")
```

We define markers to display on a dotplot to assess cell type annotation :

```{r dotplot_markers}
dotplot_markers = c("PTPRC",                # immune cells
                    "CD3E", "CD4",          # CD4+ T cells
                    "CD3E", "CD8A",         # CD8+ T cells
                    "CD207", "AIF1",        # Langerhans cells
                    "TREM2", "MSR1",        # macrophages
                    "CD79A", "CD79B",       # B cells
                    "MSX2",                 # matrix cells
                    "KRT32", "KRT35",       # cuticle
                    "KRT31", "PRR9",        # cortex
                    "BAMBI", "ALDH1A3",     # medulla
                    "KRT71", "KRT73",       # IRS
                    "TOP2A", "MCM5", "TK1", # cycling cells
                    "KRT14", "CXCL14",      # non-matrix cells
                    "DIO2", "TCEAL2",       # HF-SCs
                    "KRT15", "COL17A1",     # IFE basal
                    "SPINK5", "KRT1",       # ORS
                    "KRT16", "KRT6C",       # IFE granular or spinous
                    "CLMP", "PPARG")        # sebocytes
```

We copy-paste a subset of the markers associated with cell type annotation :

```{r cell_markers}
cell_markers = list(
  #  Immune cells
  "CD4 T cells" = c("CD3D", "CD3G", "CD3E", "CD52", "IL32",
                    "TRBC1", "CD4", "COTL1", "CD40LG", "GPR183", "TNFRSF1B", "FYB1", "LAT"),
  "CD8 T cells" = c("CD3D", "CD3G", "CD3E", "CD52", "IL32",
                    "CTSW", "NCR3", "CD8A", "NKG7", "KLRC1", "TRGC2", "CCL5", "ZNF683"),
  "Langerhans cells" = c("CD207", "LST1", "AIF1", "CPVL", "C15orf48", "CD1A", "CD52", "CD1E", "CD1C"),
  "macrophages" = c("LST1", "AIF1", "C3", "OLFML3", "TREM2", "A2M",
                    "MSR1", "FCGR3A", "VSIG4", "AP003481.1"),
  "B cells" = c("CD79A", "IGHG2", "IGHG3", "IGLC3", "IGHG1", "IGHM", "MS4A1", "IGLC2",
                "IGHGP", "BANK1", "IGHG4", "IGKC", "TNFRSF13C", "CD79B", "KLF2", "GZMB"),
  # Matrix
  "cuticle" = c("KRT32", "CYSTM1", "MT4", "KRT35", "VSNL1", "NOTCH1"),
  "cortex" = c("C10orf99", "CRYAB", "HEPHL1", "ALOX15B", "EFHD1", "DSG4", "DNASE1L2", "TGM3", "KRT31"),
  "medulla" = c("BAMBI", "SLC7A8", "EDNRA", "IGFBP2", "KRT25", "KITLG"),
  "IRS" = c("KRT71", "KRT27", "KRT28", "CTSC", "GATA3", "KRT73", "SCEL"),
  "proliferative" = c("TOP2A", "MKI67", "BIRC5", "TK1", "MCM5"),
  # Non-matrix
  "HF-SCs" = c("KRT15", "DST", "DIO2", "TCEAL2", "SFRP1", "TNC", "WIF1", "FRZB", "LHX2", "COL17A1"),
  "IFE basal" = c("GPX2", "MOXD1", "C19orf48", "ALDH3A1", "CDH13", "LAMB3", "CAVIN1"),
  "IFE granular spinous" = c("DEFB1", "LY6D", "EHF", "PDZK1IP1", "S100A7", "FGFBP1", "SPINK5", "KRT1", "KRTDAP"),
  "ORS" = c("KRT16", "KRT6B", "KRT6C", "TM4SF1", "APOC1", "CBLN2", "HES4", "LYPD3"),
  # other
  "melanocytes" = c("DCT", "KIT", "MLANA", "GPM6B", "EDNRB", "PMEL", "IGFBP7", "MITF", "ZEB2", "APOD"),
  "sebocytes" = c("CLMP", "PPARG", "ACSBG1", "MGST1", "SAA1", "APMAP"))

lengths(cell_markers)
```

Custom functions to display gene expression on the heatmap :

```{r color_fun}
color_fun = function(one_gene) {
  gene_range = range(ht_annot[, one_gene])
  gene_palette = circlize::colorRamp2(colors = c("#FFFFFF", aquarius::color_gene[-1]),
                                      breaks = seq(from = gene_range[1], to = gene_range[2],
                                                   length.out = length(aquarius::color_gene)))
  return(gene_palette)
}
```

# Load datasets

In a list, we load:

* Seurat object
* name of the 2D projection to visualize cells on
* various third element such as: sample information, trajectory, results from gene analysis...

```{r list_data}
list_data = list()

## Our dataset
# Atlas of all cells
list_data$our_atlas$sobj = readRDS(paste0(data_dir, "/3_combined/hs_hd_sobj.rds"))
list_data$our_atlas$name2D = "harmony_38_tsne"
list_data$our_atlas$sample_info = readRDS(paste0(data_dir, "/1_metadata/hs_hd_sample_info.rds"))
list_data$our_atlas$sobj

# ORS + IFEb dataset
list_data$ors_ifeb$sobj = readRDS(paste0(data_dir, "/4_zoom/3_zoom_ors_ifeb/ors_ifeb_sobj.rds"))
list_data$ors_ifeb$name2D = "harmony_20_tsne"
list_data$ors_ifeb$list_results = readRDS(paste0(data_dir, "/4_zoom/3_zoom_ors_ifeb/ors_ifeb_list_results.rds"))
list_data$ors_ifeb$sobj

# HF-SCs dataset
list_data$hfsc$sobj = readRDS(paste0(data_dir, "/4_zoom/2_zoom_hfsc/hfsc_sobj.rds"))
list_data$hfsc$name2D = "harmony_24_tsne"
list_data$hfsc$list_results = readRDS(paste0(data_dir, "/4_zoom/2_zoom_hfsc/hfsc_list_results.rds"))
list_data$hfsc$sobj

# Non matrix cells dataset
list_data$non_matrix$sobj = readRDS(paste0(data_dir, "/4_zoom/5_zoom_non_matrix/non_matrix_sobj_traj_tinga.rds"))
list_data$non_matrix$name2D = "harmony_dm"
list_data$non_matrix$my_traj = readRDS(paste0(data_dir, "/4_zoom/5_zoom_non_matrix/non_matrix_my_traj_tinga.rds"))
list_data$non_matrix$sobj

# Matrix cells dataset (not considered in the manuscript)
# list_data$matrix$sobj = readRDS(paste0(data_dir, "/4_zoom/6_zoom_matrix/matrix_sobj.rds"))
# list_data$matrix$name2D = "harmony_19_tsne"
# list_data$matrix$list_results = readRDS(paste0(data_dir, "/4_zoom/6_zoom_matrix/matrix_list_results.rds"))
# list_data$matrix$sobj

# Immune cells
list_data$immune$sobj = readRDS(paste0(data_dir, "/4_zoom/1_zoom_immune/immune_cells_sobj.rds"))
list_data$immune$name2D = "harmony_20_tsne"
list_data$immune$list_results = readRDS(paste0(data_dir, "/4_zoom/1_zoom_immune/immune_cells_list_results.rds"))
list_data$immune$sobj

## Wu dataset (OEP002321)
# Atlas of all cells
list_data$wu_atlas$sobj = readRDS(paste0(data_dir, "/5_wu/3_combined/wu_sobj.rds"))
list_data$wu_atlas$name2D = "harmony_39_tsne"
list_data$wu_atlas$sample_info = readRDS(paste0(data_dir, "/5_wu/1_metadata/wu_sample_info.rds"))

# ORS + IFEb dataset
list_data$wu_ors_ifeb$sobj = readRDS(paste0(data_dir, "/5_wu/4_ors_ifeb/ors_ifeb_sobj.rds"))
list_data$wu_ors_ifeb$name2D = "harmony_20_tsne"
list_data$wu_ors_ifeb$list_results = readRDS(paste0(data_dir, "/5_wu/4_ors_ifeb/ors_ifeb_list_results.rds"))
list_data$wu_ors_ifeb$sobj

## Takahashi dataset (GSE129611)
# Atlas of all cells
list_data$takahashi_atlas$sobj = readRDS(paste0(data_dir, "/6_takahashi/3_combined/takahashi_sobj.rds"))
list_data$takahashi_atlas$name2D = "harmony_41_tsne"
list_data$takahashi_atlas$sample_info = readRDS(paste0(data_dir, "/6_takahashi/1_metadata/takahashi_sample_info.rds"))
list_data$takahashi_atlas$sobj

# ORS + IFEb dataset
list_data$takahashi_ors_ifeb$sobj = readRDS(paste0(data_dir, "/6_takahashi/4_ors_ifeb/ors_ifeb_sobj.rds"))
list_data$takahashi_ors_ifeb$name2D = "harmony_23_tsne"
list_data$takahashi_ors_ifeb$list_results = readRDS(paste0(data_dir, "/6_takahashi/4_ors_ifeb/ors_ifeb_list_results.rds"))
list_data$takahashi_ors_ifeb$sobj

## Three datasets combined (Our + OEP002321 + GSE129611)
list_data$combined$sobj = readRDS(paste0(data_dir, "/3_combined/data3_sobj.rds"))
list_data$combined$name2D = "harmony_37_tsne"
list_data$combined$sobj

## Print
str(list_data, max.level = 2)
```


# Figures

## Sample information

Our dataset:

```{r sample_info_our, fig.width = 12, fig.height = 3}
sobj = list_data$our_atlas$sobj
sample_info = list_data$our_atlas$sample_info

# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
  as.data.frame.table(., stringsAsFactors = FALSE) %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  dplyr::left_join(x = ., y = sample_info, by = "sample_identifier") 

# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
  patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))
```

Wu dataset:

```{r sample_info_wu, fig.width = 12, fig.height = 3}
sobj = list_data$wu_atlas$sobj
sample_info = list_data$wu_atlas$sample_info

# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
  as.data.frame.table(., stringsAsFactors = FALSE) %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  dplyr::left_join(x = ., y = sample_info, by = "sample_identifier") 

# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
  patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))
```

Takahashi dataset:

```{r sample_info_takahashi, fig.width = 12, fig.height = 3}
sobj = list_data$takahashi_atlas$sobj
sample_info = list_data$takahashi_atlas$sample_info

# Nb cells by dataset
to_plot = table(sobj$sample_identifier) %>%
  as.data.frame.table(., stringsAsFactors = FALSE) %>%
  `colnames<-`(c("sample_identifier", "nb_cells")) %>%
  dplyr::left_join(x = ., y = sample_info, by = "sample_identifier") 

# patchwork
plot_list = aquarius::fig_plot_gb(to_plot, title = "Available datasets")
patchwork::wrap_plots(plot_list) +
  patchwork::plot_layout(design = "A\nB", heights = c(0.1,5)) &
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 15))
```

## Annotation smoothing

For each dataset, we smooth the single-cell level cell type annotation at a cluster level.

* Our dataset:

```{r smoothing_atlas_our}
# Select dataset
sobj = list_data$our_atlas$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$our_atlas$sobj = sobj
```

```{r smoothing_non_matrix_our}
# Select dataset
sobj = list_data$non_matrix$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$non_matrix$sobj = sobj
```

```{r smoothing_immune_our}
# Select dataset
sobj = list_data$immune$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$immune$sobj = sobj
```

* Wu dataset:

```{r smoothing_wu}
# Select dataset
sobj = list_data$wu_atlas$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$wu_atlas$sobj = sobj
```

* Takahashi dataset:

```{r smoothing_takahashi}
# Select dataset
sobj = list_data$takahashi_atlas$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$takahashi_atlas$sobj = sobj
```

* Combined dataset:

```{r smoothing_combined}
# Select dataset
sobj = list_data$combined$sobj

# Annotation smoothing
sobj$cell_type = sobj$cell_type %>%
  as.character() %>%
  factor(., levels = names(color_markers))

cluster_type = table(sobj$cell_type, sobj$seurat_clusters) %>%
  prop.table(., margin = 2) %>%
  apply(., 2, which.max)
cluster_type = setNames(nm = names(cluster_type),
                        levels(sobj$cell_type)[cluster_type])

sobj$cluster_type = cluster_type[sobj$seurat_clusters]
sobj$cluster_type = factor(sobj$cluster_type,
                           levels = levels(sobj$cell_type))

# Consider change
list_data$combined$sobj = sobj
```

For the combined dataset, we also define the cell category:

```{r cell_category_combined}
# Select dataset
sobj = list_data$combined$sobj

# Define cell category
sobj$cell_category = custom_order_cell_type[sobj$cell_type, "cell_category"]
table(sobj$cell_type, sobj$cell_category)

# Consider change
list_data$combined$sobj = sobj
```


## Atlas

### Our dataset

We select the dataset:

```{r our_dataset}
sobj = list_data$our_atlas$sobj
name2D = list_data$our_atlas$name2D
sample_info = list_data$our_atlas$sample_info
```

* sample identifier

```{r sample_id_our, fig.width = 4, fig.height = 4}
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_identifier = sobj$sample_identifier
cells_coord$location = sobj$location
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_identifier)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$sample_identifier) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

* location

```{r location_our, fig.width = 4, fig.height = 4}
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = location)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = location_colors,
                              breaks = names(location_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

* cell type annotation

```{r cell_type_our, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* cluster type annotation

```{r cluster_type_our, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* dotplot for cell type annotation

```{r dotplot_our, fig.width = 8, fig.height = 8.5}
plot_list = aquarius::plot_dotplot(sobj,
                                   markers = dotplot_markers,
                                   assay = "RNA", column_name = "cell_type", nb_hline = 0) +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(legend.position = "left",
                 legend.justification = "bottom",
                 legend.box = "vertical",
                 legend.box.margin = margin(0,70,0,0),
                 axis.title = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.text.x = element_blank(),
                 axis.line.x = element_blank(),
                 plot.margin = unit(rep(0, 4), "cm"))

p = data.frame(cell_type = factor(levels(sobj$cell_type),
                                  levels = levels(sobj$cell_type))) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0)) +
  ggplot2::geom_point(size = 0) +
  ggplot2::geom_segment(aes(x = 0.5, xend = 5.5, y = 0, yend = 0), size = 6, col = category_color["immune cells"]) +
  ggplot2::geom_segment(aes(x = 5.5, xend = 10.5, y = 0, yend = 0), size = 6, col = category_color["matrix"]) +
  ggplot2::geom_segment(aes(x = 10.5, xend = 15.5, y = 0, yend = 0), size = 6, col = category_color["non matrix"]) +
  ggplot2::scale_y_continuous(expand = c(0,0), limits = c(0,0)) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.y = element_blank(),
                 axis.ticks.y = element_blank(),
                 axis.title = element_blank(),
                 axis.line.y = element_blank(),
                 axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
                 plot.margin = unit(c(0,0.5,0.5,0), "cm"))

plot_list = patchwork::wrap_plots(plot_list, p,
                                  ncol = 1, heights = c(25, 1))
plot_list
```

* barplot with proportion of cells for cell types of interest, by sample ID

```{r barplot_atlas, fig.width = 7, fig.height = 5}
# Cells of interest
cells_oi = c("CD4 T cells", "macrophages")

# Proportion of cells by sample ID
df_proportion = table(sobj$cluster_type,
                      sobj$sample_identifier) %>%
  prop.table(., margin = 2) %>%
  as.data.frame.table() %>%
  `colnames<-`(c("cluster_type", "sample_identifier", "freq")) %>%
  dplyr::filter(cluster_type %in% cells_oi) %>%
  dplyr::mutate(cluster_type = factor(cluster_type, levels = cells_oi)) %>%
  dplyr::mutate(freq = 100*freq) # percentage

# Plot
ggplot2::ggplot(df_proportion, aes(x = sample_identifier,
                                   y = freq,
                                   fill = cluster_type)) +
  ggplot2::geom_bar(stat = "identity", position = ggplot2::position_dodge(),
                    color = "black", width = 0.7) +
  ggplot2::labs(y = "% of cells by sample") +
  ggplot2::scale_y_continuous(expand = c(0, 0), limits = c(0,14)) +
  ggplot2::scale_fill_manual(values = color_markers[cells_oi],
                             breaks = cells_oi,
                             name = "Cell Type") +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.title.x = element_blank(),
                 axis.line.x = element_line(color = "gray50"),
                 panel.grid.major.y = element_line(),
                 panel.grid.minor.y = element_line(),
                 text = element_text(size = 15))
```

### Wu dataset

We select the dataset:

```{r wu_dataset}
sobj = list_data$wu_atlas$sobj
name2D = list_data$wu_atlas$name2D
sample_info = list_data$wu_atlas$sample_info
```

* sample identifier

```{r sample_id_wu, fig.width = 4, fig.height = 4}
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_identifier = sobj$sample_identifier
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_identifier)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$sample_identifier) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

* cell type annotation

```{r cell_type_wu, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* cluster type annotation

```{r cluster_type_wu, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* dotplot for cell type annotation

```{r dotplot_wu, fig.width = 8, fig.height = 8.5}
plot_list = aquarius::plot_dotplot(sobj,
                                   markers = dotplot_markers,
                                   assay = "RNA", column_name = "cell_type", nb_hline = 0) +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(legend.position = "left",
                 legend.justification = "bottom",
                 legend.box = "vertical",
                 legend.box.margin = margin(0,70,0,0),
                 axis.title = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.text.x = element_blank(),
                 axis.line.x = element_blank(),
                 plot.margin = unit(rep(0, 4), "cm"))

p = data.frame(cell_type = factor(levels(sobj$cell_type),
                                  levels = levels(sobj$cell_type))) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0)) +
  ggplot2::geom_point(size = 0) +
  ggplot2::geom_segment(aes(x = 0.5, xend = 5.5, y = 0, yend = 0), size = 6, col = category_color["immune cells"]) +
  ggplot2::geom_segment(aes(x = 5.5, xend = 10.5, y = 0, yend = 0), size = 6, col = category_color["matrix"]) +
  ggplot2::geom_segment(aes(x = 10.5, xend = 15.5, y = 0, yend = 0), size = 6, col = category_color["non matrix"]) +
  ggplot2::scale_y_continuous(expand = c(0,0), limits = c(0,0)) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.y = element_blank(),
                 axis.ticks.y = element_blank(),
                 axis.title = element_blank(),
                 axis.line.y = element_blank(),
                 axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
                 plot.margin = unit(c(0,0.5,0.5,0), "cm"))

plot_list = patchwork::wrap_plots(plot_list, p,
                                  ncol = 1, heights = c(25, 1))
plot_list
```

### Takahashi dataset

We select the dataset:

```{r takahashi_dataset}
sobj = list_data$takahashi_atlas$sobj
name2D = list_data$takahashi_atlas$name2D
sample_info = list_data$takahashi_atlas$sample_info
```

* sample identifier

```{r sample_id_takahashi, fig.width = 4, fig.height = 4}
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_identifier = sobj$sample_identifier
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_identifier)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$sample_identifier) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

* cell type annotation

```{r cell_type_takahashi, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* cluster type annotation

```{r cluster_type_takahashi, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* dotplot for cell type annotation

```{r dotplot_takahashi, fig.width = 8, fig.height = 8.5}
plot_list = aquarius::plot_dotplot(sobj,
                                   markers = dotplot_markers,
                                   assay = "RNA", column_name = "cell_type", nb_hline = 0) +
  ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
  ggplot2::theme(legend.position = "left",
                 legend.justification = "bottom",
                 legend.box = "vertical",
                 legend.box.margin = margin(0,70,0,0),
                 axis.title = element_blank(),
                 axis.ticks.x = element_blank(),
                 axis.text.x = element_blank(),
                 axis.line.x = element_blank(),
                 plot.margin = unit(rep(0, 4), "cm"))

p = data.frame(cell_type = factor(levels(sobj$cell_type),
                                  levels = levels(sobj$cell_type))) %>%
  ggplot2::ggplot(., aes(x = cell_type, y = 0)) +
  ggplot2::geom_point(size = 0) +
  ggplot2::geom_segment(aes(x = 0.5, xend = 5.5, y = 0, yend = 0), size = 6, col = category_color["immune cells"]) +
  ggplot2::geom_segment(aes(x = 5.5, xend = 10.5, y = 0, yend = 0), size = 6, col = category_color["matrix"]) +
  ggplot2::geom_segment(aes(x = 10.5, xend = 15.5, y = 0, yend = 0), size = 6, col = category_color["non matrix"]) +
  ggplot2::scale_y_continuous(expand = c(0,0), limits = c(0,0)) +
  ggplot2::theme_classic() +
  ggplot2::theme(axis.text.y = element_blank(),
                 axis.ticks.y = element_blank(),
                 axis.title = element_blank(),
                 axis.line.y = element_blank(),
                 axis.text.x = element_text(angle = 45, hjust = 1, size = 10, color = "black"),
                 plot.margin = unit(c(0,0.5,0.5,0), "cm"))

plot_list = patchwork::wrap_plots(plot_list, p,
                                  ncol = 1, heights = c(25, 1))
plot_list
```

### Combined datasets

We select the dataset:

```{r combined_dataset}
sobj = list_data$combined$sobj
name2D = list_data$combined$name2D
sample_info = rbind(list_data$our_atlas$sample_info,
                    list_data$wu_atlas$sample_info,
                    list_data$takahashi_atlas$sample_info)
```

* sample identifier

```{r sample_id_combined, fig.width = 4, fig.height = 4}
# Random order
set.seed(1234)
rnd_order = sample(colnames(sobj), replace = FALSE, size = ncol(sobj))

# Extract coordinates
cells_coord = sobj@reductions[[name2D]]@cell.embeddings %>%
  as.data.frame() %>%
  `colnames<-`(c("Dim1", "Dim2"))
cells_coord$sample_identifier = sobj$sample_identifier
cells_coord$location = sobj$location
cells_coord$laboratory = sobj$laboratory
cells_coord$gender = dplyr::left_join(sobj@meta.data,
                                      sample_info,
                                      by = "project_name")[, "gender"]
cells_coord = cells_coord[(rnd_order), ]

# Plot
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = sample_identifier)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = sample_info$color,
                              breaks = sample_info$sample_identifier) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

* location

```{r location_combined, fig.width = 4, fig.height = 4}
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = location)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = location_colors,
                              breaks = names(location_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

* laboratory

```{r laboratory_combined, fig.width = 4, fig.height = 4}
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = laboratory)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = laboratory_colors,
                              breaks = names(laboratory_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

* gender

```{r gender_combined, fig.width = 4, fig.height = 4}
ggplot2::ggplot(cells_coord, aes(x = Dim1, y = Dim2, col = gender)) +
  ggplot2::geom_point(size = 0.5) +
  ggplot2::scale_color_manual(values = gender_colors,
                              breaks = names(gender_colors)) +
  ggplot2::theme_void() +
  ggplot2::theme(aspect.ratio = 1,
                 legend.position = "none")
```

* cell type annotation

```{r cell_type_combined, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* cluster type annotation

```{r cluster_type_combined, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* split by laboratory:

```{r combined_split, fig.width = 12, fig.height = 5}
plot_list = aquarius::plot_split_dimred(sobj,
                                        reduction = name2D,
                                        split_by = "laboratory",
                                        group_by = "cluster_type",
                                        group_color = color_markers,
                                        order = TRUE,
                                        bg_color = bg_color)

patchwork::wrap_plots(plot_list, nrow = 1) +
  patchwork::plot_layout(guides = "collect") &
  Seurat::NoLegend()
```

* gene to assess global annotation

```{r gene_global_combined, fig.width = 4, fig.height = 4}
plot_list = lapply(c("PTPRC", "MSX2", "KRT14"), FUN = function(one_gene) {
  p = Seurat::FeaturePlot(sobj, features = one_gene, reduction = name2D) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
    ggplot2::theme(aspect.ratio = 1) +
    Seurat::NoAxes() +
    Seurat::NoLegend()
  
  return(p)
})

plot_list
```

* violin plot, split by cell type, split by laboratory

for non matrix cells

```{r violin_non_matrix_combined, fig.width = 5, fig.height = 10}
category_of_interest = "non matrix"

# Select cells of interest
cell_type_of_interest = as.character(custom_order_cell_type[
  which(custom_order_cell_type$cell_category == category_of_interest), "cell_type"])

subsobj_of_interest = subset(sobj, cell_type %in% cell_type_of_interest)
subsobj_of_interest$cell_type_labo = paste0(subsobj_of_interest$cell_type,
                                            subsobj_of_interest$laboratory)

cell_markers_of_interest = cell_markers[cell_type_of_interest]

total_height = max(lengths(cell_markers_of_interest)) + 1

# Violin plot
patchwork_list = lapply(names(cell_markers_of_interest), FUN = function(cell_type) {
  markers_set = cell_markers_of_interest[[cell_type]]
  
  # Individual violin plot
  sub_plot_list = lapply(markers_set, FUN = function(one_gene) {
    p = Seurat::VlnPlot(subsobj_of_interest,
                        features = one_gene, pt.size = 0,
                        group.by = "cell_type",
                        split.by = "laboratory",
                        cols = laboratory_colors,
                        combine = FALSE)[[1]] +
      ggplot2::labs(y = one_gene) +
      ggplot2::scale_y_continuous(expand = c(0.01, 0)) +
      ggplot2::geom_vline(mapping = NULL, data = NULL,
                          xintercept = c(1:4) + 0.5,
                          col = "gray50", lty = 2) +
      Seurat::NoLegend() +
      ggplot2::theme(plot.title = element_blank(),
                     axis.title.x = element_blank(), # "Identity"
                     axis.line.x = element_blank(),  # replaced by panel.border
                     axis.title.y = element_text(size = 8, angle = 0, vjust = 0.5), # gene
                     axis.text.y = element_blank(), # "Expression level"
                     axis.text.x = element_blank(), # cell type
                     axis.ticks = element_blank(),
                     plot.margin = unit(c(0,0,0,0), "cm"),
                     panel.border = element_rect(size = 1, color = "gray50"))
    return(p)
  })
  
  # Reset axis.text for last plot
  # sub_plot_list[[length(sub_plot_list)]] = sub_plot_list[[length(sub_plot_list)]] +
  #   ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) # cell type
  
  # Add empty plot to arrange heights
  sub_plot_list[[length(sub_plot_list) + 1]] = patchwork::plot_spacer()
  empty_plot_height = total_height - length(markers_set)
  
  # Arrange as patchwork
  pp = patchwork::wrap_plots(sub_plot_list,
                             ncol = 1,
                             heights = c(rep(1, length(markers_set)),
                                         empty_plot_height)) +
    patchwork::plot_annotation(title = cell_type)
  
  return(pp)
})
names(patchwork_list) = names(cell_markers_of_interest)

patchwork::wrap_plots(patchwork_list, ncol = 1)
```

for matrix cells

```{r violin_matrix_combined, fig.width = 5, fig.height = 11}
category_of_interest = "matrix"

# Select cells of interest
cell_type_of_interest = as.character(custom_order_cell_type[
  which(custom_order_cell_type$cell_category == category_of_interest), "cell_type"])

subsobj_of_interest = subset(sobj, cell_type %in% cell_type_of_interest)
subsobj_of_interest$cell_type_labo = paste0(subsobj_of_interest$cell_type,
                                            subsobj_of_interest$laboratory)

cell_markers_of_interest = cell_markers[cell_type_of_interest]

total_height = max(lengths(cell_markers_of_interest)) + 1

# Violin plot
patchwork_list = lapply(names(cell_markers_of_interest), FUN = function(cell_type) {
  markers_set = cell_markers_of_interest[[cell_type]]
  
  # Individual violin plot
  sub_plot_list = lapply(markers_set, FUN = function(one_gene) {
    p = Seurat::VlnPlot(subsobj_of_interest,
                        features = one_gene, pt.size = 0,
                        group.by = "cell_type",
                        split.by = "laboratory",
                        cols = laboratory_colors,
                        combine = FALSE)[[1]] +
      ggplot2::labs(y = one_gene) +
      ggplot2::scale_y_continuous(expand = c(0.01, 0)) +
      ggplot2::geom_vline(mapping = NULL, data = NULL,
                          xintercept = c(1:4) + 0.5,
                          col = "gray50", lty = 2) +
      Seurat::NoLegend() +
      ggplot2::theme(plot.title = element_blank(),
                     axis.title.x = element_blank(), # "Identity"
                     axis.line.x = element_blank(),  # replaced by panel.border
                     axis.title.y = element_text(size = 8, angle = 0, vjust = 0.5), # gene
                     axis.text.y = element_blank(), # "Expression level"
                     axis.text.x = element_blank(), # cell type
                     axis.ticks = element_blank(),
                     plot.margin = unit(c(0,0,0,0), "cm"),
                     panel.border = element_rect(size = 1, color = "gray50"))
    return(p)
  })
  
  # Reset axis.text for last plot
  # sub_plot_list[[length(sub_plot_list)]] = sub_plot_list[[length(sub_plot_list)]] +
  #   ggplot2::theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) # cell type
  
  # Add empty plot to arrange heights
  sub_plot_list[[length(sub_plot_list) + 1]] = patchwork::plot_spacer()
  empty_plot_height = total_height - length(markers_set)
  
  # Arrange as patchwork
  pp = patchwork::wrap_plots(sub_plot_list,
                             ncol = 1,
                             heights = c(rep(1, length(markers_set)),
                                         empty_plot_height)) +
    patchwork::plot_annotation(title = cell_type)
  
  return(pp)
})
names(patchwork_list) = names(cell_markers_of_interest)

patchwork::wrap_plots(patchwork_list, ncol = 1)
```

## Non-matrix cells

We select the datasets:

```{r non_matrix_dataset_our}
sobj_atlas = list_data$our_atlas$sobj
name2D_atlas = list_data$our_atlas$name2D
sobj = list_data$non_matrix$sobj
name2D = list_data$non_matrix$name2D
my_traj = list_data$non_matrix$my_traj

cell_type_of_interest = c("HF-SCs", "ORS", "IFE basal", "IFE granular spinous")
```

Location of cells on the whole atlas:

```{r non_matrix_location_our, fig.width = 2, fig.height = 2}
sobj_atlas$cell_bc = colnames(sobj_atlas)
sobj$cell_bc = colnames(sobj)
sobj_atlas$is_of_interest = dplyr::left_join(sobj_atlas@meta.data[, c("cell_bc", "percent.mt")],
                                             sobj@meta.data[, c("cell_bc", "cluster_type")],
                                             by = "cell_bc")[, "cluster_type"]

Seurat::DimPlot(sobj_atlas, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_of_interest", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[cell_type_of_interest], bg_color),
                              breaks = c(cell_type_of_interest, NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()
```

* sample type

```{r sample_type_non_matrix_our, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "sample_type", cols = sample_type_colors) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* cell type annotation

```{r cell_type_non_matrix_our, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* cluster type annotation

```{r cluster_type_non_matrix_our, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* pseudotime

```{r pseudotime_non_matrix_our, fig.width = 4, fig.height = 5}
Seurat::FeaturePlot(sobj, reduction = name2D, pt.size = 0.5,
                    features = "pseudotime") +
  ggplot2::scale_color_gradientn(colors = viridis::viridis(n = 100)) +
  ggplot2::lims(x = range(sobj@reductions[[name2D]]@cell.embeddings[, 1]),
                y = range(sobj@reductions[[name2D]]@cell.embeddings[, 2])) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank(),
                 legend.position = "bottom",
                 legend.direction = "horizontal") +
  Seurat::NoAxes()
```

* pseudotime with dynplot's function :

```{r pseudotime2_non_matrix_our, fig.width = 4, fig.height = 4}
dynplot::plot_dimred(trajectory = my_traj,
                     dimred = sobj[[name2D]]@cell.embeddings,
                     # Cells
                     color_cells = 'pseudotime',
                     size_cells = 0.5,
                     border_radius_percentage = 0,
                     # Trajectory
                     plot_trajectory = TRUE,
                     color_trajectory = "none",
                     label_milestones = FALSE,
                     size_milestones = 0,
                     size_transitions = 1)
```

* violin plots

```{r non_matrix_common_genes_vln, fig.width = 2.5, fig.height = 4}
features_oi = c("SOX9", "TCF4", "TBX1", "NFATC1", 
                "LHX2", "RUNX1", "VIM", "TCF3", "FGF18", "CD34",
                # HF-SCs + IFE basal
                "COL17A1", "MT2A", "TXNIP", "CAVIN1",
                # HF-SCs + ORS
                "TUBB2A", "KRT6B", "MARCKSL1", "ID4", "CYP1B1", "BARX2",
                # HF-SCs
                "LMCD1", "TCEAL2", "FRZB", "THBS1", "DIO2")

plot_list = lapply(features_oi, FUN = function(one_gene) {
  p = Seurat::VlnPlot(sobj, group.by = "cluster_type",
                      features = one_gene, pt.size = 0.001) +
    ggplot2::scale_fill_manual(values = color_markers[levels(sobj$cluster_type)],
                               breaks = levels(sobj$cluster_type)) +
    ggplot2::theme(plot.subtitle = element_text(hjust = 0.5),
                   axis.title.x = element_blank(),
                   legend.position = "none")
  return(p)
})

names(plot_list) = features_oi
plot_list
```

* feature plots

```{r non_matrix_common_genes_feature, fig.width = 4, fig.height = 4}
plot_list = lapply(features_oi, FUN = function(one_gene) {
  p = Seurat::FeaturePlot(sobj, features = one_gene, reduction = name2D) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
    ggplot2::lims(x = range(sobj@reductions[[name2D]]@cell.embeddings[, 1]),
                  y = range(sobj@reductions[[name2D]]@cell.embeddings[, 2])) +
    ggplot2::theme(aspect.ratio = 1) +
    Seurat::NoAxes() +
    Seurat::NoLegend()
  
  return(p)
})

names(plot_list) = features_oi
plot_list
```

## ORS vs IFE basal

### Our dataset

We select the dataset:

```{r ors_ifeb_our}
sobj_atlas = list_data$our_atlas$sobj
name2D_atlas = list_data$our_atlas$name2D
sobj = list_data$ors_ifeb$sobj
name2D = list_data$ors_ifeb$name2D
list_results = list_data$ors_ifeb$list_results
```

Location of cells on the whole atlas:

```{r ors_ifeb_location_our, fig.width = 2, fig.height = 2}
sobj_atlas$cell_bc = colnames(sobj_atlas)
sobj$cell_bc = colnames(sobj)
sobj_atlas$is_of_interest = dplyr::left_join(sobj_atlas@meta.data[, c("cell_bc", "percent.mt")],
                                             sobj@meta.data[, c("cell_bc", "cell_type")],
                                             by = "cell_bc")[, "cell_type"]

Seurat::DimPlot(sobj_atlas, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_of_interest", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[cell_type_of_interest], bg_color),
                              breaks = c(cell_type_of_interest, NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()
```

DE genes between ORS and IFE basal :

```{r de_ors_ifeb_our, fig.width = 6, fig.height = 6}
mark = list_results$ORS_vs_IFE_basal$mark
mark$gene_name = rownames(mark)
mark_label = rbind(
  # up-regulated in ORS
  mark %>% dplyr::top_n(., n = 20, wt = avg_logFC),
  # up-regulated in IFE basal
  mark %>% dplyr::top_n(., n = 20, wt = -avg_logFC),
  # representative and selective for ORS
  mark %>% dplyr::top_n(., n = 20, wt = (pct.1 - pct.2)),
  # representative and selective for IFE basal
  mark %>% dplyr::top_n(., n = 20, wt = -(pct.1 - pct.2))) %>%
  dplyr::distinct()
mark_label = mark_label[!grepl(rownames(mark_label), pattern = "^MT"), ]

avg_logFC_range = setNames(c(min(mark_label$avg_logFC), -1, 0, 1, max(mark_label$avg_logFC)),
                           nm = c("dodgerblue4", "dodgerblue3", "#B7B7B7", "firebrick3", "firebrick4"))


ggplot2::ggplot(mark, aes(x = pct.1, y = pct.2, col = avg_logFC)) +
  ggplot2::geom_abline(slope = 1, intercept = 0, lty = 2) +
  ggplot2::geom_point() +
  ggrepel::geom_label_repel(data = mark_label, max.overlaps = Inf,
                            aes(x = pct.1, y = pct.2, label = gene_name),
                            col = "black", fill = NA, size = 3.5, label.size = NA) +
  ggplot2::labs(x = "Enriched in ORS",
                y = "Enriched in IFE basal") +
  ggplot2::scale_color_gradientn(colors = names(avg_logFC_range),
                                 values = scales::rescale(unname(avg_logFC_range))) +
  ggplot2::theme_classic() +
  ggplot2::theme(aspect.ratio = 1)
```

* GSEA, enriched in ORS compared to IFE basal

```{r kera_ors_ifeb_our, fig.width = 6, fig.height = 4}
the_gs_name = "REACTOME_KERATINIZATION" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))
```

* GSEA, enriched in IFE basal compared to ORS

```{r ifn_ors_ifeb_our, fig.width = 6, fig.height = 4}
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))
```


* violin plots of DE genes in ORS, between HS and HD

```{r ors_ifeb_vln_ors_our, fig.width = 2, fig.height = 3}
subsobj = subset(sobj, cell_type == "ORS")
features_oi = c("DUSP1", "DDIT4", "GABARAP", "ZNF302",
                "MIF", "LGALS7", "ARF5", "S100A9")

# Plot !
plot_list = lapply(features_oi, FUN = function(feature) {
  # t-test between sample type
  feature_expr = subsobj@assays$RNA@data[feature, ]
  feature_hs = feature_expr[subsobj$sample_type == "HS"]
  feature_hd = feature_expr[subsobj$sample_type == "HD"]
  feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
  feature_hs_VS_feature_hd
  
  # Significance associated with p-value
  pval = feature_hs_VS_feature_hd$p.value
  
  significance = case_when(pval > 0.05 ~ "ns",
                           pval > 0.01 & pval <= 0.05 ~ "*",
                           pval > 0.001 & pval <= 0.01 ~ "**",
                           pval <= 0.001 ~ "***")
  
  # Plot
  p = Seurat::VlnPlot(subsobj, group.by = "sample_type",
                      pt.size = 0.3, features = feature) +
    ggplot2::scale_fill_manual(values = sample_type_colors,
                               breaks = names(sample_type_colors)) +
    # Significance bar
    ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
                          size = 0.5,
                          x = 1, xend = 2,
                          y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
    ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
                        x = 1.5, y = max(feature_expr)+0.35,
                        label = significance,
                        fill = NA, label.size = 0) +
    ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
    # Theme
    ggplot2::theme(axis.title.x = element_blank(),
                   legend.position = "none")
  
  return(p)
})

names(plot_list) = features_oi
plot_list
```

* violin plots of DE genes in IFE basal, between HS and HD

```{r ors_ifeb_vln_ifeb_our, fig.width = 2, fig.height = 3}
subsobj = subset(sobj, cell_type == "IFE basal")
features_oi = c("DUSP1", "KLF6", "CLDN1", "CTGF",
                "IFI27", "IFITM3", "CCL2", "S100A9")

# Plot !
plot_list = lapply(features_oi, FUN = function(feature) {
  # t-test between sample type
  feature_expr = subsobj@assays$RNA@data[feature, ]
  feature_hs = feature_expr[subsobj$sample_type == "HS"]
  feature_hd = feature_expr[subsobj$sample_type == "HD"]
  feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
  feature_hs_VS_feature_hd
  
  # Significance associated with p-value
  pval = feature_hs_VS_feature_hd$p.value
  
  significance = case_when(pval > 0.05 ~ "ns",
                           pval > 0.01 & pval <= 0.05 ~ "*",
                           pval > 0.001 & pval <= 0.01 ~ "**",
                           pval <= 0.001 ~ "***")
  
  # Plot
  p = Seurat::VlnPlot(subsobj, group.by = "sample_type",
                      pt.size = 0.3, features = feature) +
    ggplot2::scale_fill_manual(values = sample_type_colors,
                               breaks = names(sample_type_colors)) +
    # Significance bar
    ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
                          size = 0.5,
                          x = 1, xend = 2,
                          y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
    ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
                        x = 1.5, y = max(feature_expr)+0.35,
                        label = significance,
                        fill = NA, label.size = 0) +
    ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
    # Theme
    ggplot2::theme(axis.title.x = element_blank(),
                   legend.position = "none")
  
  return(p)
})

names(plot_list) = features_oi
plot_list
```


### Wu dataset

We select the dataset:

```{r ors_ifeb_wu}
sobj_atlas = list_data$wu_atlas$sobj
name2D_atlas = list_data$wu_atlas$name2D
sobj = list_data$wu_ors_ifeb$sobj
name2D = list_data$wu_ors_ifeb$name2D
list_results = list_data$wu_ors_ifeb$list_results
```

Location of cells on the whole atlas:

```{r ors_ifeb_location_wu, fig.width = 2, fig.height = 2}
sobj_atlas$cell_bc = colnames(sobj_atlas)
sobj$cell_bc = colnames(sobj)
sobj_atlas$is_of_interest = dplyr::left_join(sobj_atlas@meta.data[, c("cell_bc", "percent.mt")],
                                             sobj@meta.data[, c("cell_bc", "cell_type")],
                                             by = "cell_bc")[, "cell_type"]

Seurat::DimPlot(sobj_atlas, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_of_interest", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[cell_type_of_interest], bg_color),
                              breaks = c(cell_type_of_interest, NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()
```

* GSEA, enriched in ORS compared to IFE basal

```{r kera_ors_ifeb_wu, fig.width = 6, fig.height = 4}
the_gs_name = "REACTOME_KERATINIZATION" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))
```

* GSEA, enriched in IFE basal compared to ORS

```{r ifn_ors_ifeb_wu, fig.width = 6, fig.height = 4}
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))
```

### Takahashi dataset

We select the dataset:

```{r ors_ifeb_takahashi}
sobj_atlas = list_data$takahashi_atlas$sobj
name2D_atlas = list_data$takahashi_atlas$name2D
sobj = list_data$takahashi_ors_ifeb$sobj
name2D = list_data$takahashi_ors_ifeb$name2D
list_results = list_data$takahashi_ors_ifeb$list_results
```

Location of cells on the whole atlas:

```{r ors_ifeb_location_takahashi, fig.width = 2, fig.height = 2}
sobj_atlas$cell_bc = colnames(sobj_atlas)
sobj$cell_bc = colnames(sobj)
sobj_atlas$is_of_interest = dplyr::left_join(sobj_atlas@meta.data[, c("cell_bc", "percent.mt")],
                                             sobj@meta.data[, c("cell_bc", "cell_type")],
                                             by = "cell_bc")[, "cell_type"]

Seurat::DimPlot(sobj_atlas, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_of_interest", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[cell_type_of_interest], bg_color),
                              breaks = c(cell_type_of_interest, NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()
```

* GSEA, enriched in ORS compared to IFE basal

```{r kera_ors_ifeb_takahashi, fig.width = 6, fig.height = 4}
the_gs_name = "REACTOME_KERATINIZATION" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))
```

* GSEA, enriched in IFE basal compared to ORS

```{r ifn_ors_ifeb_takahashi, fig.width = 6, fig.height = 4}
the_gs_name = "HALLMARK_INTERFERON_GAMMA_RESPONSE" 
the_content = list_results$ORS_vs_IFE_basal$gsea@result %>%
  dplyr::filter(ID == the_gs_name)
the_subtitle = paste0("\nNES : ", round(the_content$NES, 2),
                      "\t|\tpvalue : ", round(the_content$pvalue, 4),
                      "\t|\tset size : ", the_content$setSize, " genes")

enrichplot::gseaplot2(x = list_results$ORS_vs_IFE_basal$gsea,
                      geneSetID = the_gs_name) +
  ggplot2::labs(title = the_gs_name,
                subtitle = the_subtitle) +
  ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold",
                                           margin = ggplot2::margin(3, 3, 5, 3)),
                 plot.subtitle = ggtext::element_markdown(hjust = 0.5,
                                                          size = 10))
```

## HF-SCs

We select the dataset:

```{r hfscs_our}
sobj = list_data$hfsc$sobj
name2D = list_data$hfsc$name2D
list_results = list_data$hfsc$list_results
```

* violin plots of DE genes in HF-SCs, between HS and HD

```{r hfscs_vln_ors_our, fig.width = 2, fig.height = 3}
features_oi = c("HLA-C", "HLA-A", "IFITM3", "MIF", "JUNB", "PPIA",
                "PDK4", "DDIT4", "BTG2", "TXNIP", "KLF9")

# Plot !
plot_list = lapply(features_oi, FUN = function(feature) {
  # t-test between sample type
  feature_expr = sobj@assays$RNA@data[feature, ]
  feature_hs = feature_expr[sobj$sample_type == "HS"]
  feature_hd = feature_expr[sobj$sample_type == "HD"]
  feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
  feature_hs_VS_feature_hd
  
  # Significance associated with p-value
  pval = feature_hs_VS_feature_hd$p.value
  
  significance = case_when(pval > 0.05 ~ "ns",
                           pval > 0.01 & pval <= 0.05 ~ "*",
                           pval > 0.001 & pval <= 0.01 ~ "**",
                           pval <= 0.001 ~ "***")
  
  # Plot
  p = Seurat::VlnPlot(sobj, group.by = "sample_type",
                      pt.size = 0.3, features = feature) +
    ggplot2::scale_fill_manual(values = sample_type_colors,
                               breaks = names(sample_type_colors)) +
    # Significance bar
    ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
                          size = 0.5,
                          x = 1, xend = 2,
                          y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
    ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
                        x = 1.5, y = max(feature_expr)+0.35,
                        label = significance,
                        fill = NA, label.size = 0) +
    ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
    # Theme
    ggplot2::theme(axis.title.x = element_blank(),
                   legend.position = "none")
  
  return(p)
})

names(plot_list) = features_oi
plot_list
```
* heatmap

Heatmap between HS and HD :

```{r hfscs_heatmap_our, fig.width = 5.5, fig.height = 10}
# features_oi = list_results$HS_vs_HD %>%
#   dplyr::filter(pct.1 > 0.5 | pct.2 > 0.5) %>%
#   dplyr::arrange(-avg_logFC, -pct.1, -pct.2) %>%
#   rownames()
# features_oi = features_oi[!grepl(features_oi, pattern = "^RP")]

features_oi = c("IFITM3", "MIF", "HLA-C", "LMCD1", "TGFB2", "CYP1B1",
                "KRT6B", "WIF1", "HSPA1A", "COMT", "CISD1", "PRNP",  "CD81",
                "HLA-A", "B2M", "PPIA", "PYCARD", "CSAD", "PFN1", "HIF1A",
                "BASP1", "CAV1", "ZNF302", "LMO4", "SEPT11", "HTRA1",
                "GPM6A", "SOX4", "ZFP36L2", "RHOB", "CLDN1", "DUSP1", "TBX1",
                "ATF3", "DDIT4", "TXNIP", "BTG2", "SGK1", "KLF6", "LGR5")

# Matrix
mat_expr = Seurat::GetAssayData(sobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = cbind(mat_expr, sobj$percent.mt)
colnames(mat_expr)[ncol(mat_expr)] = "percent.rb"
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells

## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# list_colors[["seurat_clusters"]] = setNames(aquarius::gg_color_hue(length(levels(sobj$seurat_clusters))),
#                                             nm = levels(sobj$seurat_clusters))
# Cells order
column_order = sobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(sobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = sobj$sample_type,
                           sample_identifier = sobj$sample_identifier,
                           # clusters = sobj$seurat_clusters,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]
                                      # clusters = list_colors[["seurat_clusters"]]
                           ))


# g1 : REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM
# g2 : GOBP_APOPTOTIC_PROCESS
g1_genes = c("B2M", "HLA-C", "HLA-A", "MIF", "PPIA", "JUNB", "IFITM3")
g2_genes = c("JUN", "ATF3", "BTG2", "RHOB", "NFKBIA", "SGK1", "KLF9",
             "CAV1", "DDIT4", "PDK4", "TXNIP", "RNF1152", "TLE1")
ha_right = data.frame(genes =  c(features_oi, "percent.rb"), rownames = c(features_oi, "percent.rb"))
ha_right$group = case_when(ha_right$genes %in% g1_genes ~ "REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM",
                           ha_right$genes %in% g2_genes ~ "GOBP_APOPTOTIC_PROCESS",
                           TRUE ~ "others")

list_colors[["group"]] = setNames(
  nm = c("REACTOME_CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM", "GOBP_APOPTOTIC_PROCESS", "others"),
  c("red", "black", "gray90"))

ha_right = HeatmapAnnotation(group = ha_right$group,
                             col = list(group = list_colors[["group"]]),
                             which = "row",
                             show_annotation_name = FALSE,
                             show_legend = TRUE)

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             right_annotation = ha_right,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             show_row_dend = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 10, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "bottom",
                     annotation_legend_side = "bottom")
```

## Immune cells

We select the datasets:

```{r immune_dataset_our}
sobj_atlas = list_data$our_atlas$sobj
name2D_atlas = list_data$our_atlas$name2D
sobj = list_data$immune$sobj
name2D = list_data$immune$name2D

cell_type_of_interest = c("CD4 T cells", "CD8 T cells", "Langerhans cells",
                          "macrophages", "B cells", "proliferative")
```

Location of cells on the whole atlas:

```{r immune_location_our, fig.width = 2, fig.height = 2}
sobj_atlas$cell_bc = colnames(sobj_atlas)
sobj$cell_bc = colnames(sobj)
sobj_atlas$is_of_interest = dplyr::left_join(sobj_atlas@meta.data[, c("cell_bc", "percent.mt")],
                                             sobj@meta.data[, c("cell_bc", "cluster_type")],
                                             by = "cell_bc")[, "cluster_type"]

Seurat::DimPlot(sobj_atlas, reduction = name2D_atlas, pt.size = 0.000001,
                group.by = "is_of_interest", order = "TRUE") +
  ggplot2::scale_color_manual(values = c(color_markers[cell_type_of_interest], bg_color),
                              breaks = c(cell_type_of_interest, NA), na.value = bg_color) +
  ggplot2::theme(aspect.ratio = 1,
                 plot.title = element_blank()) +
  Seurat::NoAxes() + Seurat::NoLegend()
```

* cell type annotation

```{r cell_type_immune_our, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cell_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* cluster type annotation

```{r cluster_type_immune_our, fig.width = 4, fig.height = 4}
Seurat::DimPlot(sobj, reduction = name2D, pt.size = 0.5,
                group.by = "cluster_type", cols = color_markers) +
  ggplot2::theme(aspect.ratio = 1) +
  Seurat::NoAxes() +
  Seurat::NoLegend()
```

* feature plots for genes related to TCR alpha

```{r gene_immune_our, fig.width = 12, fig.height = 8}
features_oi = c("CD3E", "CD4", "CD8A",
                sort(grep(rownames(sobj), pattern = "^TRA[C|V]", value = TRUE)))

plot_list = lapply(features_oi, FUN = function(one_gene) {
  p = Seurat::FeaturePlot(sobj, features = one_gene,
                          reduction = name2D, order = TRUE) +
    ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
    ggplot2::theme(aspect.ratio = 1) +
    Seurat::NoAxes() +
    Seurat::NoLegend()
  
  return(p)
})

patchwork::wrap_plots(plot_list, nrow = 4)
```

* feature plots split by sample type (HS / HD)

```{r gene_split, immune_our, fig.width = 4, fig.height = 2}
features_oi = data.frame(population = c("all immune cells", rep("macrophages", 2),
                                      rep("CD4 T cells", 3), rep("CD8 T cells", 2)),
                         feature = c("IL6", "IL1B", "TNF", "GZMA", "IFNG", "IL17A", "PRF1", "GZMB"))
features_oi

patchwork_list = lapply(features_oi$feature, FUN = function(one_gene) {
  plot_list = aquarius::plot_split_dimred(sobj,
                                          reduction = name2D,
                                          split_by = "sample_type",
                                          color_by = one_gene,
                                          order = TRUE,
                                          bg_color = bg_color)
  
  p = patchwork::wrap_plots(plot_list, nrow = 1) +
    patchwork::plot_layout(guides = "collect")
  
  return(p)
})
names(patchwork_list) = features_oi$feature

patchwork_list
```

* violin plots within a specific population

```{r gene_split_immune_our, fig.width = 2, fig.height = 3}
plot_list = lapply(c(1:nrow(features_oi)), FUN = function(i) {
  one_gene = features_oi$feature[i]
  population = features_oi$population[i]
  
  # Subset object
  if (population != "all immune cells") {
    subsobj = subset(sobj, cluster_type %in% population)
  } else {
   subsobj = sobj 
  }
  
  # t-test between sample type
  feature_expr = subsobj@assays$RNA@data[one_gene, ]
  feature_hs = feature_expr[subsobj$sample_type == "HS"]
  feature_hd = feature_expr[subsobj$sample_type == "HD"]
  feature_hs_VS_feature_hd = stats::t.test(feature_hs, feature_hd)
  feature_hs_VS_feature_hd
  
  # Significance associated with p-value
  pval = feature_hs_VS_feature_hd$p.value
  
  significance = case_when(pval > 0.05 ~ "ns",
                           pval > 0.01 & pval <= 0.05 ~ "*",
                           pval > 0.001 & pval <= 0.01 ~ "**",
                           pval <= 0.001 ~ "***")
  
  # Plot
  p = Seurat::VlnPlot(subsobj, group.by = "sample_type",
                      pt.size = 0.3, features = one_gene) +
    ggplot2::scale_fill_manual(values = sample_type_colors,
                               breaks = names(sample_type_colors)) +
    ggplot2::labs(title = population,
                  subtitle = one_gene) +
    # Significance bar
    ggplot2::geom_segment(data = data.frame(1), inherit.aes = FALSE,
                          size = 0.5,
                          x = 1, xend = 2,
                          y = max(feature_expr)+0.3, yend = max(feature_expr)+0.3) +
    ggplot2::geom_label(data = data.frame(1), inherit.aes = FALSE,
                        x = 1.5, y = max(feature_expr)+0.35,
                        label = significance,
                        fill = NA, label.size = 0) +
    ggplot2::lims(y = c(0, max(feature_expr)+0.4)) +
    # Theme
    ggplot2::theme(axis.title.x = element_blank(),
                   plot.title = element_text(size = 10),
                   plot.subtitle = element_text(hjust = 0.5),
                   legend.position = "none")
  
  return(p)
})
names(plot_list) = features_oi$feature

plot_list
```


* heatmap macrophages

```{r heatmap_immune_macrophages, fig.width = 5, fig.height = 5.2}
subsobj = subset(sobj, cluster_type == "macrophages")
features_oi = c("IL1B", "TNF",
                "HLA-DQA2", "HLA-DPA1", "HLA-DRB5",
                "HLA-A", "HLA-C", "B2M",
                "C1QA", "C1QB", "C1QC")

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# Cells order
column_order = subsobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]))

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "bottom",
                     annotation_legend_side = "bottom")
```

* heatmap T cells

```{r heatmap_immune_t_cells, fig.width = 6, fig.height = 4}
subsobj = subset(sobj, cluster_type == "CD4 T cells")
features_oi = c("GZMA", "KLRB1", "BTG1", "ZFP36", "NFKBIA", "TXNIP", "CXCR4", "IFNG", "IL17A")

# Matrix
mat_expr = Seurat::GetAssayData(subsobj)
mat_expr = mat_expr[features_oi, ]
mat_expr = Matrix::t(mat_expr)
mat_expr = dynutils::scale_quantile(mat_expr) # between 0 and 1
mat_expr = Matrix::t(mat_expr)
dim(mat_expr) # genes x cells
## Colors
list_colors = list()

# Heatmap
list_colors[["expression"]] = rev(RColorBrewer::brewer.pal(name = "RdBu", n = 9))

# Sample annotation (top annotation)
list_colors[["sample_type"]] = sample_type_colors
list_colors[["sample_identifier"]] = setNames(nm = sample_info$sample_identifier,
                                              sample_info$color)
# Cells order
column_order = subsobj@meta.data %>%
  dplyr::arrange(sample_type, sample_identifier) %>%
  rownames()
column_order = match(column_order, rownames(subsobj@meta.data))

# Heatmap
ha_top = HeatmapAnnotation(sample_type = subsobj$sample_type,
                           sample_identifier = subsobj$sample_identifier,
                           col = list(sample_type = list_colors[["sample_type"]],
                                      sample_identifier = list_colors[["sample_identifier"]]))

# Heatmap
ht = Heatmap(as.matrix(mat_expr),
             heatmap_legend_param = list(title = "Expression", at = c(0, 1), 
                                         labels = c("low", "high")),
             col = list_colors[["expression"]],
             top_annotation = ha_top,
             show_column_names = FALSE,
             column_order = column_order,
             column_gap = unit(2, "mm"),
             cluster_rows = FALSE,
             row_title = NULL,
             row_names_gp = grid::gpar(fontsize = 14, fontface = "plain"),
             use_raster = FALSE,
             show_heatmap_legend = TRUE,
             border = TRUE)

ComplexHeatmap::draw(ht,
                     merge_legend = TRUE,
                     heatmap_legend_side = "left",
                     annotation_legend_side = "left")
```

# Supplementary tables

In this section, we save files to associate with the manuscript, as supplementary tables.

## Table S2 (package version)

We load the table :

```{r ts2_load}
package_version = read.table(paste0(".", "/data/info_to_install_2023_04_17.txt"),
                             header = TRUE)
head(package_version)
```

Columns correspond to :

* **order** : order to install package such that one never need to install dependencies
* **package_name** : package name
* **version** : installed version, on the local machine
* **url** : the package was installed in the Singularity container using this url

We save this table, except the last column (just for me) :

```{r ts2_save}
openxlsx::write.xlsx(package_version[, c(1:4)],
                     file = paste0(".", "/data/Supplementary Table 2.xlsx"))
```

## Table S3 (cell type annotation)

We load the table :

```{r ts3_load}
cell_markers = readRDS(paste0(data_dir, "/1_metadata/hs_hd_cell_markers.rds"))
lengths(cell_markers)
```

We save this table, except the last column (just for me) :

```{r ts3_save}
openxlsx::write.xlsx(cell_markers,
                     file = paste0(".", "/data/Supplementary Table 3.xlsx"))
```

# R Session

```{r sessioninfo, echo = FALSE, fold_output = TRUE}
sessionInfo()
```
